diff options
author | Timothy Pearson <tpearson@raptorengineering.com> | 2017-08-23 14:45:25 -0500 |
---|---|---|
committer | Timothy Pearson <tpearson@raptorengineering.com> | 2017-08-23 14:45:25 -0500 |
commit | fcbb27b0ec6dcbc5a5108cb8fb19eae64593d204 (patch) | |
tree | 22962a4387943edc841c72a4e636a068c66d58fd /arch/um | |
download | ast2050-linux-kernel-fcbb27b0ec6dcbc5a5108cb8fb19eae64593d204.zip ast2050-linux-kernel-fcbb27b0ec6dcbc5a5108cb8fb19eae64593d204.tar.gz |
Initial import of modified Linux 2.6.28 tree
Original upstream URL:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git | branch linux-2.6.28.y
Diffstat (limited to 'arch/um')
344 files changed, 36453 insertions, 0 deletions
diff --git a/arch/um/Kconfig.char b/arch/um/Kconfig.char new file mode 100644 index 0000000..70dabd1 --- /dev/null +++ b/arch/um/Kconfig.char @@ -0,0 +1,239 @@ + +menu "Character Devices" + +config STDERR_CONSOLE + bool "stderr console" + default y + help + console driver which dumps all printk messages to stderr. + +config STDIO_CONSOLE + bool + default y + +config SSL + bool "Virtual serial line" + help + The User-Mode Linux environment allows you to create virtual serial + lines on the UML that are usually made to show up on the host as + ttys or ptys. + + See <http://user-mode-linux.sourceforge.net/old/input.html> for more + information and command line examples of how to use this facility. + + Unless you have a specific reason for disabling this, say Y. + +config NULL_CHAN + bool "null channel support" + help + This option enables support for attaching UML consoles and serial + lines to a device similar to /dev/null. Data written to it disappears + and there is never any data to be read. + +config PORT_CHAN + bool "port channel support" + help + This option enables support for attaching UML consoles and serial + lines to host portals. They may be accessed with 'telnet <host> + <port number>'. Any number of consoles and serial lines may be + attached to a single portal, although what UML device you get when + you telnet to that portal will be unpredictable. + It is safe to say 'Y' here. + +config PTY_CHAN + bool "pty channel support" + help + This option enables support for attaching UML consoles and serial + lines to host pseudo-terminals. Access to both traditional + pseudo-terminals (/dev/pty*) and pts pseudo-terminals are controlled + with this option. The assignment of UML devices to host devices + will be announced in the kernel message log. + It is safe to say 'Y' here. + +config TTY_CHAN + bool "tty channel support" + help + This option enables support for attaching UML consoles and serial + lines to host terminals. Access to both virtual consoles + (/dev/tty*) and the slave side of pseudo-terminals (/dev/ttyp* and + /dev/pts/*) are controlled by this option. + It is safe to say 'Y' here. + +config XTERM_CHAN + bool "xterm channel support" + help + This option enables support for attaching UML consoles and serial + lines to xterms. Each UML device so assigned will be brought up in + its own xterm. + It is safe to say 'Y' here. + +config NOCONFIG_CHAN + bool + default !(XTERM_CHAN && TTY_CHAN && PTY_CHAN && PORT_CHAN && NULL_CHAN) + +config CON_ZERO_CHAN + string "Default main console channel initialization" + default "fd:0,fd:1" + help + This is the string describing the channel to which the main console + will be attached by default. This value can be overridden from the + command line. The default value is "fd:0,fd:1", which attaches the + main console to stdin and stdout. + It is safe to leave this unchanged. + +config CON_CHAN + string "Default console channel initialization" + default "xterm" + help + This is the string describing the channel to which all consoles + except the main console will be attached by default. This value can + be overridden from the command line. The default value is "xterm", + which brings them up in xterms. + It is safe to leave this unchanged, although you may wish to change + this if you expect the UML that you build to be run in environments + which don't have X or xterm available. + +config SSL_CHAN + string "Default serial line channel initialization" + default "pty" + help + This is the string describing the channel to which the serial lines + will be attached by default. This value can be overridden from the + command line. The default value is "pty", which attaches them to + traditional pseudo-terminals. + It is safe to leave this unchanged, although you may wish to change + this if you expect the UML that you build to be run in environments + which don't have a set of /dev/pty* devices. + +config UNIX98_PTYS + bool "Unix98 PTY support" + help + A pseudo terminal (PTY) is a software device consisting of two + halves: a master and a slave. The slave device behaves identical to + a physical terminal; the master device is used by a process to + read data from and write data to the slave, thereby emulating a + terminal. Typical programs for the master side are telnet servers + and xterms. + + Linux has traditionally used the BSD-like names /dev/ptyxx for + masters and /dev/ttyxx for slaves of pseudo terminals. This scheme + has a number of problems. The GNU C library glibc 2.1 and later, + however, supports the Unix98 naming standard: in order to acquire a + pseudo terminal, a process opens /dev/ptmx; the number of the pseudo + terminal is then made available to the process and the pseudo + terminal slave can be accessed as /dev/pts/<number>. What was + traditionally /dev/ttyp2 will then be /dev/pts/2, for example. + + All modern Linux systems use the Unix98 ptys. Say Y unless + you're on an embedded system and want to conserve memory. + +config LEGACY_PTYS + bool "Legacy (BSD) PTY support" + default y + help + A pseudo terminal (PTY) is a software device consisting of two + halves: a master and a slave. The slave device behaves identical to + a physical terminal; the master device is used by a process to + read data from and write data to the slave, thereby emulating a + terminal. Typical programs for the master side are telnet servers + and xterms. + + Linux has traditionally used the BSD-like names /dev/ptyxx + for masters and /dev/ttyxx for slaves of pseudo + terminals. This scheme has a number of problems, including + security. This option enables these legacy devices; on most + systems, it is safe to say N. + +config RAW_DRIVER + tristate "RAW driver (/dev/raw/rawN)" + depends on BLOCK + help + The raw driver permits block devices to be bound to /dev/raw/rawN. + Once bound, I/O against /dev/raw/rawN uses efficient zero-copy I/O. + See the raw(8) manpage for more details. + + Applications should preferably open the device (eg /dev/hda1) + with the O_DIRECT flag. + +config MAX_RAW_DEVS + int "Maximum number of RAW devices to support (1-8192)" + depends on RAW_DRIVER + default "256" + help + The maximum number of RAW devices that are supported. + Default is 256. Increase this number in case you need lots of + raw devices. + +config LEGACY_PTY_COUNT + int "Maximum number of legacy PTY in use" + depends on LEGACY_PTYS + default "256" + help + The maximum number of legacy PTYs that can be used at any one time. + The default is 256, and should be more than enough. Embedded + systems may want to reduce this to save memory. + + When not in use, each legacy PTY occupies 12 bytes on 32-bit + architectures and 24 bytes on 64-bit architectures. + +config WATCHDOG + bool "Watchdog Timer Support" + +config WATCHDOG_NOWAYOUT + bool "Disable watchdog shutdown on close" + depends on WATCHDOG + +config SOFT_WATCHDOG + tristate "Software Watchdog" + depends on WATCHDOG + +config UML_WATCHDOG + tristate "UML watchdog" + depends on WATCHDOG + +config UML_SOUND + tristate "Sound support" + help + This option enables UML sound support. If enabled, it will pull in + soundcore and the UML hostaudio relay, which acts as a intermediary + between the host's dsp and mixer devices and the UML sound system. + It is safe to say 'Y' here. + +config SOUND + tristate + default UML_SOUND + +config SOUND_OSS_CORE + bool + default UML_SOUND + +config HOSTAUDIO + tristate + default UML_SOUND + +#It is selected elsewhere, so kconfig would warn without this. +config HW_RANDOM + tristate + default n + +config UML_RANDOM + tristate "Hardware random number generator" + help + This option enables UML's "hardware" random number generator. It + attaches itself to the host's /dev/random, supplying as much entropy + as the host has, rather than the small amount the UML gets from its + own drivers. It registers itself as a standard hardware random number + generator, major 10, minor 183, and the canonical device name is + /dev/hwrng. + The way to make use of this is to install the rng-tools package + (check your distro, or download from + http://sourceforge.net/projects/gkernel/). rngd periodically reads + /dev/hwrng and injects the entropy into /dev/random. + +config MMAPPER + tristate "iomem emulation driver" + help + This driver allows a host file to be used as emulated IO memory inside + UML. + +endmenu diff --git a/arch/um/Kconfig.common b/arch/um/Kconfig.common new file mode 100644 index 0000000..0d207e7 --- /dev/null +++ b/arch/um/Kconfig.common @@ -0,0 +1,77 @@ +config DEFCONFIG_LIST + string + option defconfig_list + default "arch/$ARCH/defconfig" + +# UML uses the generic IRQ subsystem +config GENERIC_HARDIRQS + bool + default y + +config UML + bool + default y + +config MMU + bool + default y + +config NO_IOMEM + def_bool y + +mainmenu "Linux/Usermode Kernel Configuration" + +config ISA + bool + +config SBUS + bool + +config PCI + bool + +config PCMCIA + bool + +# Yet to do! +config TRACE_IRQFLAGS_SUPPORT + bool + default n + +config LOCKDEP_SUPPORT + bool + default y + +config STACKTRACE_SUPPORT + bool + default n + +config GENERIC_CALIBRATE_DELAY + bool + default y + +config GENERIC_BUG + bool + default y + depends on BUG + +config GENERIC_TIME + bool + default y + +config GENERIC_CLOCKEVENTS + bool + default y + +# Used in kernel/irq/manage.c and include/linux/irq.h +config IRQ_RELEASE_METHOD + bool + default y + +config HZ + int + default 100 + +config SUBARCH + string + option env="SUBARCH" diff --git a/arch/um/Kconfig.debug b/arch/um/Kconfig.debug new file mode 100644 index 0000000..8fce5e5 --- /dev/null +++ b/arch/um/Kconfig.debug @@ -0,0 +1,40 @@ +menu "Kernel hacking" + +source "lib/Kconfig.debug" + +config GPROF + bool "Enable gprof support" + depends on DEBUG_INFO && FRAME_POINTER + help + This allows profiling of a User-Mode Linux kernel with the gprof + utility. + + See <http://user-mode-linux.sourceforge.net/old/gprof.html> for more + details. + + If you're involved in UML kernel development and want to use gprof, + say Y. If you're unsure, say N. + +config GCOV + bool "Enable gcov support" + depends on DEBUG_INFO + help + This option allows developers to retrieve coverage data from a UML + session. + + See <http://user-mode-linux.sourceforge.net/old/gprof.html> for more + details. + + If you're involved in UML kernel development and want to use gcov, + say Y. If you're unsure, say N. + +config DEBUG_STACK_USAGE + bool "Stack utilization instrumentation" + default N + help + Track the maximum kernel stack usage - this will look at each + kernel stack at process exit and log it if it's the deepest + stack seen so far. + + This option will slow down process creation and destruction somewhat. +endmenu diff --git a/arch/um/Kconfig.net b/arch/um/Kconfig.net new file mode 100644 index 0000000..9e9a4aa --- /dev/null +++ b/arch/um/Kconfig.net @@ -0,0 +1,202 @@ + +menu "UML Network Devices" + depends on NET + +# UML virtual driver +config UML_NET + bool "Virtual network device" + help + While the User-Mode port cannot directly talk to any physical + hardware devices, this choice and the following transport options + provide one or more virtual network devices through which the UML + kernels can talk to each other, the host, and with the host's help, + machines on the outside world. + + For more information, including explanations of the networking and + sample configurations, see + <http://user-mode-linux.sourceforge.net/old/networking.html>. + + If you'd like to be able to enable networking in the User-Mode + linux environment, say Y; otherwise say N. Note that you must + enable at least one of the following transport options to actually + make use of UML networking. + +config UML_NET_ETHERTAP + bool "Ethertap transport" + depends on UML_NET + help + The Ethertap User-Mode Linux network transport allows a single + running UML to exchange packets with its host over one of the + host's Ethertap devices, such as /dev/tap0. Additional running + UMLs can use additional Ethertap devices, one per running UML. + While the UML believes it's on a (multi-device, broadcast) virtual + Ethernet network, it's in fact communicating over a point-to-point + link with the host. + + To use this, your host kernel must have support for Ethertap + devices. Also, if your host kernel is 2.4.x, it must have + CONFIG_NETLINK_DEV configured as Y or M. + + For more information, see + <http://user-mode-linux.sourceforge.net/old/networking.html> That site + has examples of the UML command line to use to enable Ethertap + networking. + + If you'd like to set up an IP network with the host and/or the + outside world, say Y to this, the Daemon Transport and/or the + Slip Transport. You'll need at least one of them, but may choose + more than one without conflict. If you don't need UML networking, + say N. + +config UML_NET_TUNTAP + bool "TUN/TAP transport" + depends on UML_NET + help + The UML TUN/TAP network transport allows a UML instance to exchange + packets with the host over a TUN/TAP device. This option will only + work with a 2.4 host, unless you've applied the TUN/TAP patch to + your 2.2 host kernel. + + To use this transport, your host kernel must have support for TUN/TAP + devices, either built-in or as a module. + +config UML_NET_SLIP + bool "SLIP transport" + depends on UML_NET + help + The slip User-Mode Linux network transport allows a running UML to + network with its host over a point-to-point link. Unlike Ethertap, + which can carry any Ethernet frame (and hence even non-IP packets), + the slip transport can only carry IP packets. + + To use this, your host must support slip devices. + + For more information, see + <http://user-mode-linux.sourceforge.net/old/networking.html>. + has examples of the UML command line to use to enable slip + networking, and details of a few quirks with it. + + The Ethertap Transport is preferred over slip because of its + limitations. If you prefer slip, however, say Y here. Otherwise + choose the Multicast transport (to network multiple UMLs on + multiple hosts), Ethertap (to network with the host and the + outside world), and/or the Daemon transport (to network multiple + UMLs on a single host). You may choose more than one without + conflict. If you don't need UML networking, say N. + +config UML_NET_DAEMON + bool "Daemon transport" + depends on UML_NET + help + This User-Mode Linux network transport allows one or more running + UMLs on a single host to communicate with each other, but not to + the host. + + To use this form of networking, you'll need to run the UML + networking daemon on the host. + + For more information, see + <http://user-mode-linux.sourceforge.net/old/networking.html> That site + has examples of the UML command line to use to enable Daemon + networking. + + If you'd like to set up a network with other UMLs on a single host, + say Y. If you need a network between UMLs on multiple physical + hosts, choose the Multicast Transport. To set up a network with + the host and/or other IP machines, say Y to the Ethertap or Slip + transports. You'll need at least one of them, but may choose + more than one without conflict. If you don't need UML networking, + say N. + +config UML_NET_VDE + bool "VDE transport" + depends on UML_NET + help + This User-Mode Linux network transport allows one or more running + UMLs on a single host to communicate with each other and also + with the rest of the world using Virtual Distributed Ethernet, + an improved fork of uml_switch. + + You must have libvdeplug installed in order to build the vde + transport into UML. + + To use this form of networking, you will need to run vde_switch + on the host. + + For more information, see <http://wiki.virtualsquare.org/> + That site has a good overview of what VDE is and also examples + of the UML command line to use to enable VDE networking. + + If you need UML networking with VDE, + say Y. + +config UML_NET_MCAST + bool "Multicast transport" + depends on UML_NET + help + This Multicast User-Mode Linux network transport allows multiple + UMLs (even ones running on different host machines!) to talk to + each other over a virtual ethernet network. However, it requires + at least one UML with one of the other transports to act as a + bridge if any of them need to be able to talk to their hosts or any + other IP machines. + + To use this, your host kernel(s) must support IP Multicasting. + + For more information, see + <http://user-mode-linux.sourceforge.net/old/networking.html> That site + has examples of the UML command line to use to enable Multicast + networking, and notes about the security of this approach. + + If you need UMLs on multiple physical hosts to communicate as if + they shared an Ethernet network, say Y. If you need to communicate + with other IP machines, make sure you select one of the other + transports (possibly in addition to Multicast; they're not + exclusive). If you don't need to network UMLs say N to each of + the transports. + +config UML_NET_PCAP + bool "pcap transport" + depends on UML_NET && EXPERIMENTAL + help + The pcap transport makes a pcap packet stream on the host look + like an ethernet device inside UML. This is useful for making + UML act as a network monitor for the host. You must have libcap + installed in order to build the pcap transport into UML. + + For more information, see + <http://user-mode-linux.sourceforge.net/old/networking.html> That site + has examples of the UML command line to use to enable this option. + + If you intend to use UML as a network monitor for the host, say + Y here. Otherwise, say N. + +config UML_NET_SLIRP + bool "SLiRP transport" + depends on UML_NET + help + The SLiRP User-Mode Linux network transport allows a running UML + to network by invoking a program that can handle SLIP encapsulated + packets. This is commonly (but not limited to) the application + known as SLiRP, a program that can re-socket IP packets back onto + the host on which it is run. Only IP packets are supported, + unlike other network transports that can handle all Ethernet + frames. In general, slirp allows the UML the same IP connectivity + to the outside world that the host user is permitted, and unlike + other transports, SLiRP works without the need of root level + privleges, setuid binaries, or SLIP devices on the host. This + also means not every type of connection is possible, but most + situations can be accomodated with carefully crafted slirp + commands that can be passed along as part of the network device's + setup string. The effect of this transport on the UML is similar + that of a host behind a firewall that masquerades all network + connections passing through it (but is less secure). + + To use this you should first have slirp compiled somewhere + accessible on the host, and have read its documentation. If you + don't need UML networking, say N. + + Startup example: "eth0=slirp,FE:FD:01:02:03:04,/usr/local/bin/slirp" + +endmenu + diff --git a/arch/um/Kconfig.rest b/arch/um/Kconfig.rest new file mode 100644 index 0000000..7b5cea7 --- /dev/null +++ b/arch/um/Kconfig.rest @@ -0,0 +1,42 @@ +source "init/Kconfig" + +source "kernel/Kconfig.freezer" + +source "drivers/block/Kconfig" + +source "arch/um/Kconfig.char" + +source "drivers/base/Kconfig" + +source "net/Kconfig" + +source "arch/um/Kconfig.net" + +source "drivers/net/Kconfig" + +source "drivers/connector/Kconfig" + +source "fs/Kconfig" + +source "security/Kconfig" + +source "crypto/Kconfig" + +source "lib/Kconfig" + +source "drivers/scsi/Kconfig" + +source "drivers/md/Kconfig" + +if BROKEN + source "drivers/mtd/Kconfig" +endif + +source "drivers/leds/Kconfig" + +#This is just to shut up some Kconfig warnings, so no prompt. +config INPUT + bool + default n + +source "arch/um/Kconfig.debug" diff --git a/arch/um/Kconfig.um b/arch/um/Kconfig.um new file mode 100644 index 0000000..ec2b8da --- /dev/null +++ b/arch/um/Kconfig.um @@ -0,0 +1,149 @@ +config STATIC_LINK + bool "Force a static link" + default n + help + This option gives you the ability to force a static link of UML. + Normally, UML is linked as a shared binary. This is inconvenient for + use in a chroot jail. So, if you intend to run UML inside a chroot, + you probably want to say Y here. + Additionally, this option enables using higher memory spaces (up to + 2.75G) for UML. + +source "mm/Kconfig" +source "kernel/time/Kconfig" + +config LD_SCRIPT_STATIC + bool + default y + depends on STATIC_LINK + +config LD_SCRIPT_DYN + bool + default y + depends on !LD_SCRIPT_STATIC + +source "fs/Kconfig.binfmt" + +config HOSTFS + tristate "Host filesystem" + help + While the User-Mode Linux port uses its own root file system for + booting and normal file access, this module lets the UML user + access files stored on the host. It does not require any + network connection between the Host and UML. An example use of + this might be: + + mount none /tmp/fromhost -t hostfs -o /tmp/umlshare + + where /tmp/fromhost is an empty directory inside UML and + /tmp/umlshare is a directory on the host with files the UML user + wishes to access. + + For more information, see + <http://user-mode-linux.sourceforge.net/hostfs.html>. + + If you'd like to be able to work with files stored on the host, + say Y or M here; otherwise say N. + +config HPPFS + tristate "HoneyPot ProcFS (EXPERIMENTAL)" + depends on EXPERIMENTAL + help + hppfs (HoneyPot ProcFS) is a filesystem which allows UML /proc + entries to be overridden, removed, or fabricated from the host. + Its purpose is to allow a UML to appear to be a physical machine + by removing or changing anything in /proc which gives away the + identity of a UML. + + See <http://user-mode-linux.sf.net/old/hppfs.html> for more information. + + You only need this if you are setting up a UML honeypot. Otherwise, + it is safe to say 'N' here. + +config MCONSOLE + bool "Management console" + default y + help + The user mode linux management console is a low-level interface to + the kernel, somewhat like the i386 SysRq interface. Since there is + a full-blown operating system running under every user mode linux + instance, there is much greater flexibility possible than with the + SysRq mechanism. + + If you answer 'Y' to this option, to use this feature, you need the + mconsole client (called uml_mconsole) which is present in CVS in + 2.4.5-9um and later (path /tools/mconsole), and is also in the + distribution RPM package in 2.4.6 and later. + + It is safe to say 'Y' here. + +config MAGIC_SYSRQ + bool "Magic SysRq key" + depends on MCONSOLE + help + If you say Y here, you will have some control over the system even + if the system crashes for example during kernel debugging (e.g., you + will be able to flush the buffer cache to disk, reboot the system + immediately or dump some status information). A key for each of the + possible requests is provided. + + This is the feature normally accomplished by pressing a key + while holding SysRq (Alt+PrintScreen). + + On UML, this is accomplished by sending a "sysrq" command with + mconsole, followed by the letter for the requested command. + + The keys are documented in <file:Documentation/sysrq.txt>. Don't say Y + unless you really know what this hack does. + +config SMP + bool "Symmetric multi-processing support (EXPERIMENTAL)" + default n + depends on BROKEN + help + This option enables UML SMP support. + It is NOT related to having a real SMP box. Not directly, at least. + + UML implements virtual SMP by allowing as many processes to run + simultaneously on the host as there are virtual processors configured. + + Obviously, if the host is a uniprocessor, those processes will + timeshare, but, inside UML, will appear to be running simultaneously. + If the host is a multiprocessor, then UML processes may run + simultaneously, depending on the host scheduler. + + This, however, is supported only in TT mode. So, if you use the SKAS + patch on your host, switching to TT mode and enabling SMP usually + gives you worse performances. + Also, since the support for SMP has been under-developed, there could + be some bugs being exposed by enabling SMP. + + If you don't know what to do, say N. + +config NR_CPUS + int "Maximum number of CPUs (2-32)" + range 2 32 + depends on SMP + default "32" + +config HIGHMEM + bool "Highmem support (EXPERIMENTAL)" + depends on !64BIT && EXPERIMENTAL + default n + help + This was used to allow UML to run with big amounts of memory. + Currently it is unstable, so if unsure say N. + + To use big amounts of memory, it is recommended enable static + linking (i.e. CONFIG_STATIC_LINK) - this should allow the + guest to use up to 2.75G of memory. + +config KERNEL_STACK_ORDER + int "Kernel stack size order" + default 1 if 64BIT + range 1 10 if 64BIT + default 0 if !64BIT + help + This option determines the size of UML kernel stacks. They will + be 1 << order pages. The default is OK unless you're running Valgrind + on UML, in which case, set this to 3. diff --git a/arch/um/Kconfig.x86 b/arch/um/Kconfig.x86 new file mode 100644 index 0000000..5ee3280 --- /dev/null +++ b/arch/um/Kconfig.x86 @@ -0,0 +1,56 @@ +source "arch/um/Kconfig.common" + +menu "UML-specific options" + +menu "Host processor type and features" + +source "arch/x86/Kconfig.cpu" + +endmenu + +config UML_X86 + def_bool y + +config 64BIT + bool + default SUBARCH = "x86_64" + +config X86_32 + def_bool !64BIT + select HAVE_AOUT + +config RWSEM_XCHGADD_ALGORITHM + def_bool X86_XADD + +config RWSEM_GENERIC_SPINLOCK + def_bool !X86_XADD + +config 3_LEVEL_PGTABLES + bool "Three-level pagetables (EXPERIMENTAL)" if !64BIT + default 64BIT + depends on EXPERIMENTAL + help + Three-level pagetables will let UML have more than 4G of physical + memory. All the memory that can't be mapped directly will be treated + as high memory. + + However, this it experimental on 32-bit architectures, so if unsure say + N (on x86-64 it's automatically enabled, instead, as it's safe there). + +config ARCH_HAS_SC_SIGNALS + def_bool !64BIT + +config ARCH_REUSE_HOST_VSYSCALL_AREA + def_bool !64BIT + +config SMP_BROKEN + def_bool 64BIT + +config GENERIC_HWEIGHT + def_bool y + +source "arch/um/Kconfig.um" + +endmenu + +source "arch/um/Kconfig.rest" diff --git a/arch/um/Makefile b/arch/um/Makefile new file mode 100644 index 0000000..d944c34 --- /dev/null +++ b/arch/um/Makefile @@ -0,0 +1,158 @@ +# +# This file is included by the global makefile so that you can add your own +# architecture-specific flags and dependencies. +# +# Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) +# Licensed under the GPL +# + +ARCH_DIR := arch/um +OS := $(shell uname -s) +# We require bash because the vmlinux link and loader script cpp use bash +# features. +SHELL := /bin/bash + +filechk_gen_header = $< + +core-y += $(ARCH_DIR)/kernel/ \ + $(ARCH_DIR)/drivers/ \ + $(ARCH_DIR)/os-$(OS)/ + +MODE_INCLUDE += -I$(srctree)/$(ARCH_DIR)/include/shared/skas + +include $(srctree)/$(ARCH_DIR)/Makefile-skas + +ARCH_INCLUDE := -I$(srctree)/$(ARCH_DIR)/include/shared +ARCH_INCLUDE += -I$(srctree)/$(ARCH_DIR)/sys-$(SUBARCH)/shared +ifneq ($(KBUILD_SRC),) +ARCH_INCLUDE += -I$(ARCH_DIR)/include/shared # for two generated files +endif +KBUILD_CPPFLAGS += -I$(srctree)/$(ARCH_DIR)/sys-$(SUBARCH) + +# -Dvmap=kernel_vmap prevents anything from referencing the libpcap.o symbol so +# named - it's a common symbol in libpcap, so we get a binary which crashes. +# +# Same things for in6addr_loopback and mktime - found in libc. For these two we +# only get link-time error, luckily. +# +# These apply to USER_CFLAGS to. + +KBUILD_CFLAGS += $(CFLAGS) $(CFLAGS-y) -D__arch_um__ -DSUBARCH=\"$(SUBARCH)\" \ + $(ARCH_INCLUDE) $(MODE_INCLUDE) -Dvmap=kernel_vmap \ + -Din6addr_loopback=kernel_in6addr_loopback \ + -Din6addr_any=kernel_in6addr_any + +KBUILD_AFLAGS += $(ARCH_INCLUDE) + +USER_CFLAGS = $(patsubst $(KERNEL_DEFINES),,$(patsubst -D__KERNEL__,,\ + $(patsubst -I%,,$(KBUILD_CFLAGS)))) $(ARCH_INCLUDE) $(MODE_INCLUDE) \ + $(filter -I%,$(CFLAGS)) -D_FILE_OFFSET_BITS=64 + +include $(srctree)/$(ARCH_DIR)/Makefile-$(SUBARCH) + +#This will adjust *FLAGS accordingly to the platform. +include $(srctree)/$(ARCH_DIR)/Makefile-os-$(OS) + +KBUILD_CPPFLAGS += -I$(srctree)/arch/$(HEADER_ARCH)/include + +# -Derrno=kernel_errno - This turns all kernel references to errno into +# kernel_errno to separate them from the libc errno. This allows -fno-common +# in KBUILD_CFLAGS. Otherwise, it would cause ld to complain about the two different +# errnos. +# These apply to kernelspace only. +# +# strip leading and trailing whitespace to make the USER_CFLAGS removal of these +# defines more robust + +KERNEL_DEFINES = $(strip -Derrno=kernel_errno -Dsigprocmask=kernel_sigprocmask \ + -Dmktime=kernel_mktime $(ARCH_KERNEL_DEFINES)) +KBUILD_CFLAGS += $(KERNEL_DEFINES) + +PHONY += linux + +all: linux + +linux: vmlinux + @echo ' LINK $@' + $(Q)ln -f $< $@ + +define archhelp + echo '* linux - Binary kernel image (./linux) - for backward' + echo ' compatibility only, this creates a hard link to the' + echo ' real kernel binary, the "vmlinux" binary you' + echo ' find in the kernel root.' +endef + +KBUILD_KCONFIG := arch/um/Kconfig.$(HEADER_ARCH) + +archprepare: $(ARCH_DIR)/include/shared/user_constants.h +prepare: $(ARCH_DIR)/include/shared/kern_constants.h + +LINK-$(CONFIG_LD_SCRIPT_STATIC) += -static +LINK-$(CONFIG_LD_SCRIPT_DYN) += -Wl,-rpath,/lib + +CFLAGS_NO_HARDENING := $(call cc-option, -fno-PIC,) $(call cc-option, -fno-pic,) \ + $(call cc-option, -fno-stack-protector,) \ + $(call cc-option, -fno-stack-protector-all,) + +CONFIG_KERNEL_STACK_ORDER ?= 2 +STACK_SIZE := $(shell echo $$[ 4096 * (1 << $(CONFIG_KERNEL_STACK_ORDER)) ] ) + +CPPFLAGS_vmlinux.lds = -U$(SUBARCH) -DSTART=$(START) -DELF_ARCH=$(ELF_ARCH) \ + -DELF_FORMAT="$(ELF_FORMAT)" -DKERNEL_STACK_SIZE=$(STACK_SIZE) + +# The wrappers will select whether using "malloc" or the kernel allocator. +LINK_WRAPS = -Wl,--wrap,malloc -Wl,--wrap,free -Wl,--wrap,calloc + +LD_FLAGS_CMDLINE = $(foreach opt,$(LDFLAGS),-Wl,$(opt)) + +CFLAGS_vmlinux := $(LINK-y) $(LINK_WRAPS) $(LD_FLAGS_CMDLINE) +define cmd_vmlinux__ + $(CC) $(CFLAGS_vmlinux) -o $@ \ + -Wl,-T,$(vmlinux-lds) $(vmlinux-init) \ + -Wl,--start-group $(vmlinux-main) -Wl,--end-group \ + -lutil \ + $(filter-out $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) vmlinux.o \ + FORCE ,$^) ; rm -f linux +endef + +# When cleaning we don't include .config, so we don't include +# TT or skas makefiles and don't clean skas_ptregs.h. +CLEAN_FILES += linux x.i gmon.out \ + $(ARCH_DIR)/include/shared/user_constants.h \ + $(ARCH_DIR)/include/shared/kern_constants.h + +archclean: + @find . \( -name '*.bb' -o -name '*.bbg' -o -name '*.da' \ + -o -name '*.gcov' \) -type f -print | xargs rm -f + +$(objtree)/$(ARCH_DIR)/include/shared: + @echo ' MKDIR $@' + $(Q)mkdir -p $@ + +# Generated files + +$(ARCH_DIR)/sys-$(SUBARCH)/user-offsets.s: FORCE + $(Q)$(MAKE) $(build)=$(ARCH_DIR)/sys-$(SUBARCH) $@ + +define filechk_gen-asm-offsets + (set -e; \ + echo "/*"; \ + echo " * DO NOT MODIFY."; \ + echo " *"; \ + echo " * This file was generated by arch/$(ARCH)/Makefile"; \ + echo " *"; \ + echo " */"; \ + echo ""; \ + sed -ne "/^->/{s:^->\([^ ]*\) [\$$#]*\([^ ]*\) \(.*\):#define \1 \2 /* \3 */:; s:->::; p;}"; \ + echo ""; ) +endef + +$(ARCH_DIR)/include/shared/user_constants.h: $(ARCH_DIR)/sys-$(SUBARCH)/user-offsets.s + $(call filechk,gen-asm-offsets) + +$(ARCH_DIR)/include/shared/kern_constants.h: $(objtree)/$(ARCH_DIR)/include/shared + @echo ' SYMLINK $@' + $(Q)ln -sf ../../../../include/asm/asm-offsets.h $@ + +export SUBARCH USER_CFLAGS CFLAGS_NO_HARDENING OS HEADER_ARCH DEV_NULL_PATH diff --git a/arch/um/Makefile-i386 b/arch/um/Makefile-i386 new file mode 100644 index 0000000..302cbe5 --- /dev/null +++ b/arch/um/Makefile-i386 @@ -0,0 +1,42 @@ +core-y += arch/um/sys-i386/ arch/x86/crypto/ + +TOP_ADDR := $(CONFIG_TOP_ADDR) + +START := 0x8048000 + +LDFLAGS += -m elf_i386 +ELF_ARCH := $(SUBARCH) +ELF_FORMAT := elf32-$(SUBARCH) +OBJCOPYFLAGS := -O binary -R .note -R .comment -S +HEADER_ARCH := x86 +CHECKFLAGS += -D__i386__ + +ifeq ("$(origin SUBARCH)", "command line") +ifneq ("$(shell uname -m | sed -e s/i.86/i386/)", "$(SUBARCH)") +KBUILD_CFLAGS += $(call cc-option,-m32) +KBUILD_AFLAGS += $(call cc-option,-m32) +LINK-y += $(call cc-option,-m32) +UML_OBJCOPYFLAGS += -F $(ELF_FORMAT) + +export LDFLAGS HOSTCFLAGS HOSTLDFLAGS UML_OBJCOPYFLAGS +endif +endif + +# First of all, tune CFLAGS for the specific CPU. This actually sets cflags-y. +include $(srctree)/arch/x86/Makefile_32.cpu + +# prevent gcc from keeping the stack 16 byte aligned. Taken from i386. +cflags-y += $(call cc-option,-mpreferred-stack-boundary=2) + +# Prevent sprintf in nfsd from being converted to strcpy and resulting in +# an unresolved reference. +cflags-y += -ffreestanding + +# Disable unit-at-a-time mode on pre-gcc-4.0 compilers, it makes gcc use +# a lot more stack due to the lack of sharing of stacklots. Also, gcc +# 4.3.0 needs -funit-at-a-time for extern inline functions. +KBUILD_CFLAGS += $(shell if [ $(call cc-version) -lt 0400 ] ; then \ + echo $(call cc-option,-fno-unit-at-a-time); \ + else echo $(call cc-option,-funit-at-a-time); fi ;) + +KBUILD_CFLAGS += $(cflags-y) diff --git a/arch/um/Makefile-ia64 b/arch/um/Makefile-ia64 new file mode 100644 index 0000000..f84dc23 --- /dev/null +++ b/arch/um/Makefile-ia64 @@ -0,0 +1 @@ +START_ADDR = 0x1000000000000000 diff --git a/arch/um/Makefile-os-Linux b/arch/um/Makefile-os-Linux new file mode 100644 index 0000000..2c8a598 --- /dev/null +++ b/arch/um/Makefile-os-Linux @@ -0,0 +1,9 @@ +# +# Copyright (C) 2000 Jeff Dike (jdike@karaya.com) +# Licensed under the GPL +# + +# To get a definition of F_SETSIG +USER_CFLAGS += -D_GNU_SOURCE -D_LARGEFILE64_SOURCE +KBUILD_CFLAGS += -D_LARGEFILE64_SOURCE +DEV_NULL_PATH = \"/dev/null\" diff --git a/arch/um/Makefile-ppc b/arch/um/Makefile-ppc new file mode 100644 index 0000000..66fd200 --- /dev/null +++ b/arch/um/Makefile-ppc @@ -0,0 +1,9 @@ +ifeq ($(CONFIG_HOST_2G_2G), y) +START_ADDR = 0x80000000 +else +START_ADDR = 0xc0000000 +endif +ARCH_CFLAGS = -U__powerpc__ -D__UM_PPC__ + +# The arch is ppc, but the elf32 name is powerpc +ELF_SUBARCH = powerpc diff --git a/arch/um/Makefile-skas b/arch/um/Makefile-skas new file mode 100644 index 0000000..ac35de5 --- /dev/null +++ b/arch/um/Makefile-skas @@ -0,0 +1,12 @@ +# +# Copyright (C) 2002 Jeff Dike (jdike@karaya.com) +# Licensed under the GPL +# + +GPROF_OPT += -pg +GCOV_OPT += -fprofile-arcs -ftest-coverage + +CFLAGS-$(CONFIG_GCOV) += $(GCOV_OPT) +CFLAGS-$(CONFIG_GPROF) += $(GPROF_OPT) +LINK-$(CONFIG_GCOV) += $(GCOV_OPT) +LINK-$(CONFIG_GPROF) += $(GPROF_OPT) diff --git a/arch/um/Makefile-x86_64 b/arch/um/Makefile-x86_64 new file mode 100644 index 0000000..a9cd7e7 --- /dev/null +++ b/arch/um/Makefile-x86_64 @@ -0,0 +1,26 @@ +# Copyright 2003 - 2004 Pathscale, Inc +# Released under the GPL + +core-y += arch/um/sys-x86_64/ arch/x86/crypto/ +START := 0x60000000 + +_extra_flags_ = -fno-builtin -m64 + +KBUILD_CFLAGS += $(_extra_flags_) + +CHECKFLAGS += -m64 -D__x86_64__ +KBUILD_AFLAGS += -m64 +LDFLAGS += -m elf_x86_64 +KBUILD_CPPFLAGS += -m64 + +ELF_ARCH := i386:x86-64 +ELF_FORMAT := elf64-x86-64 +HEADER_ARCH := x86 + +# Not on all 64-bit distros /lib is a symlink to /lib64. PLD is an example. + +LINK-$(CONFIG_LD_SCRIPT_DYN) += -Wl,-rpath,/lib64 +LINK-y += -m64 + +# Do unit-at-a-time unconditionally on x86_64, following the host +KBUILD_CFLAGS += $(call cc-option,-funit-at-a-time) diff --git a/arch/um/defconfig b/arch/um/defconfig new file mode 100644 index 0000000..6bd456f --- /dev/null +++ b/arch/um/defconfig @@ -0,0 +1,617 @@ +# +# Automatically generated make config: don't edit +# Linux kernel version: 2.6.24 +# Thu Feb 7 11:48:55 2008 +# +CONFIG_DEFCONFIG_LIST="arch/$ARCH/defconfig" +CONFIG_GENERIC_HARDIRQS=y +CONFIG_UML=y +CONFIG_MMU=y +CONFIG_NO_IOMEM=y +# CONFIG_TRACE_IRQFLAGS_SUPPORT is not set +CONFIG_LOCKDEP_SUPPORT=y +# CONFIG_STACKTRACE_SUPPORT is not set +CONFIG_GENERIC_CALIBRATE_DELAY=y +CONFIG_GENERIC_BUG=y +CONFIG_GENERIC_TIME=y +CONFIG_GENERIC_CLOCKEVENTS=y +CONFIG_IRQ_RELEASE_METHOD=y +CONFIG_HZ=100 + +# +# UML-specific options +# +# CONFIG_STATIC_LINK is not set + +# +# Host processor type and features +# +# CONFIG_M386 is not set +# CONFIG_M486 is not set +# CONFIG_M586 is not set +# CONFIG_M586TSC is not set +# CONFIG_M586MMX is not set +CONFIG_M686=y +# CONFIG_MPENTIUMII is not set +# CONFIG_MPENTIUMIII is not set +# CONFIG_MPENTIUMM is not set +# CONFIG_MPENTIUM4 is not set +# CONFIG_MK6 is not set +# CONFIG_MK7 is not set +# CONFIG_MK8 is not set +# CONFIG_MCRUSOE is not set +# CONFIG_MEFFICEON is not set +# CONFIG_MWINCHIPC6 is not set +# CONFIG_MWINCHIP2 is not set +# CONFIG_MWINCHIP3D is not set +# CONFIG_MGEODEGX1 is not set +# CONFIG_MGEODE_LX is not set +# CONFIG_MCYRIXIII is not set +# CONFIG_MVIAC3_2 is not set +# CONFIG_MVIAC7 is not set +# CONFIG_MPSC is not set +# CONFIG_MCORE2 is not set +# CONFIG_GENERIC_CPU is not set +# CONFIG_X86_GENERIC is not set +CONFIG_X86_CMPXCHG=y +CONFIG_X86_L1_CACHE_SHIFT=5 +CONFIG_X86_XADD=y +CONFIG_X86_PPRO_FENCE=y +CONFIG_X86_WP_WORKS_OK=y +CONFIG_X86_INVLPG=y +CONFIG_X86_BSWAP=y +CONFIG_X86_POPAD_OK=y +CONFIG_X86_GOOD_APIC=y +CONFIG_X86_USE_PPRO_CHECKSUM=y +CONFIG_X86_TSC=y +CONFIG_X86_CMOV=y +CONFIG_X86_MINIMUM_CPU_FAMILY=4 +CONFIG_X86_DEBUGCTLMSR=y +CONFIG_UML_X86=y +CONFIG_X86_32=y +CONFIG_RWSEM_XCHGADD_ALGORITHM=y +# CONFIG_64BIT is not set +CONFIG_SEMAPHORE_SLEEPERS=y +# CONFIG_3_LEVEL_PGTABLES is not set +CONFIG_ARCH_HAS_SC_SIGNALS=y +CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA=y +CONFIG_GENERIC_HWEIGHT=y +CONFIG_SELECT_MEMORY_MODEL=y +CONFIG_FLATMEM_MANUAL=y +# CONFIG_DISCONTIGMEM_MANUAL is not set +# CONFIG_SPARSEMEM_MANUAL is not set +CONFIG_FLATMEM=y +CONFIG_FLAT_NODE_MEM_MAP=y +# CONFIG_SPARSEMEM_STATIC is not set +# CONFIG_SPARSEMEM_VMEMMAP_ENABLE is not set +CONFIG_SPLIT_PTLOCK_CPUS=4 +# CONFIG_RESOURCES_64BIT is not set +CONFIG_ZONE_DMA_FLAG=0 +CONFIG_VIRT_TO_BUS=y +CONFIG_TICK_ONESHOT=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_GENERIC_CLOCKEVENTS_BUILD=y +CONFIG_LD_SCRIPT_DYN=y +CONFIG_BINFMT_ELF=y +# CONFIG_BINFMT_AOUT is not set +CONFIG_BINFMT_MISC=m +CONFIG_HOSTFS=y +# CONFIG_HPPFS is not set +CONFIG_MCONSOLE=y +CONFIG_MAGIC_SYSRQ=y +# CONFIG_HIGHMEM is not set +CONFIG_KERNEL_STACK_ORDER=0 + +# +# General setup +# +CONFIG_EXPERIMENTAL=y +CONFIG_BROKEN_ON_SMP=y +CONFIG_INIT_ENV_ARG_LIMIT=128 +CONFIG_LOCALVERSION="" +CONFIG_LOCALVERSION_AUTO=y +CONFIG_SWAP=y +CONFIG_SYSVIPC=y +CONFIG_SYSVIPC_SYSCTL=y +CONFIG_POSIX_MQUEUE=y +CONFIG_BSD_PROCESS_ACCT=y +# CONFIG_BSD_PROCESS_ACCT_V3 is not set +# CONFIG_TASKSTATS is not set +# CONFIG_USER_NS is not set +# CONFIG_PID_NS is not set +# CONFIG_AUDIT is not set +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_LOG_BUF_SHIFT=14 +# CONFIG_CGROUPS is not set +CONFIG_FAIR_GROUP_SCHED=y +CONFIG_FAIR_USER_SCHED=y +# CONFIG_FAIR_CGROUP_SCHED is not set +CONFIG_SYSFS_DEPRECATED=y +# CONFIG_RELAY is not set +# CONFIG_BLK_DEV_INITRD is not set +CONFIG_CC_OPTIMIZE_FOR_SIZE=y +CONFIG_SYSCTL=y +# CONFIG_EMBEDDED is not set +CONFIG_UID16=y +CONFIG_SYSCTL_SYSCALL=y +CONFIG_KALLSYMS=y +# CONFIG_KALLSYMS_ALL is not set +CONFIG_KALLSYMS_EXTRA_PASS=y +CONFIG_HOTPLUG=y +CONFIG_PRINTK=y +CONFIG_BUG=y +CONFIG_ELF_CORE=y +CONFIG_BASE_FULL=y +CONFIG_FUTEX=y +CONFIG_ANON_INODES=y +CONFIG_EPOLL=y +CONFIG_SIGNALFD=y +CONFIG_TIMERFD=y +CONFIG_EVENTFD=y +CONFIG_SHMEM=y +CONFIG_VM_EVENT_COUNTERS=y +CONFIG_SLAB=y +# CONFIG_SLUB is not set +# CONFIG_SLOB is not set +# CONFIG_PROFILING is not set +# CONFIG_MARKERS is not set +# CONFIG_HAVE_OPROFILE is not set +# CONFIG_HAVE_KPROBES is not set +CONFIG_PROC_PAGE_MONITOR=y +CONFIG_SLABINFO=y +CONFIG_RT_MUTEXES=y +# CONFIG_TINY_SHMEM is not set +CONFIG_BASE_SMALL=0 +CONFIG_MODULES=y +CONFIG_MODULE_UNLOAD=y +# CONFIG_MODULE_FORCE_UNLOAD is not set +# CONFIG_MODVERSIONS is not set +# CONFIG_MODULE_SRCVERSION_ALL is not set +CONFIG_KMOD=y +CONFIG_BLOCK=y +# CONFIG_LBD is not set +# CONFIG_BLK_DEV_IO_TRACE is not set +# CONFIG_LSF is not set +# CONFIG_BLK_DEV_BSG is not set + +# +# IO Schedulers +# +CONFIG_IOSCHED_NOOP=y +CONFIG_IOSCHED_AS=y +CONFIG_IOSCHED_DEADLINE=y +CONFIG_IOSCHED_CFQ=y +CONFIG_DEFAULT_AS=y +# CONFIG_DEFAULT_DEADLINE is not set +# CONFIG_DEFAULT_CFQ is not set +# CONFIG_DEFAULT_NOOP is not set +CONFIG_DEFAULT_IOSCHED="anticipatory" +CONFIG_CLASSIC_RCU=y +# CONFIG_PREEMPT_RCU is not set +CONFIG_BLK_DEV=y +CONFIG_BLK_DEV_UBD=y +# CONFIG_BLK_DEV_UBD_SYNC is not set +CONFIG_BLK_DEV_COW_COMMON=y +CONFIG_BLK_DEV_LOOP=m +# CONFIG_BLK_DEV_CRYPTOLOOP is not set +CONFIG_BLK_DEV_NBD=m +# CONFIG_BLK_DEV_RAM is not set +# CONFIG_ATA_OVER_ETH is not set + +# +# Character Devices +# +CONFIG_STDERR_CONSOLE=y +CONFIG_STDIO_CONSOLE=y +CONFIG_SSL=y +CONFIG_NULL_CHAN=y +CONFIG_PORT_CHAN=y +CONFIG_PTY_CHAN=y +CONFIG_TTY_CHAN=y +CONFIG_XTERM_CHAN=y +# CONFIG_NOCONFIG_CHAN is not set +CONFIG_CON_ZERO_CHAN="fd:0,fd:1" +CONFIG_CON_CHAN="xterm" +CONFIG_SSL_CHAN="pts" +CONFIG_UNIX98_PTYS=y +CONFIG_LEGACY_PTYS=y +# CONFIG_RAW_DRIVER is not set +CONFIG_LEGACY_PTY_COUNT=32 +# CONFIG_WATCHDOG is not set +CONFIG_UML_SOUND=m +CONFIG_SOUND=m +CONFIG_HOSTAUDIO=m +# CONFIG_HW_RANDOM is not set +CONFIG_UML_RANDOM=y +# CONFIG_MMAPPER is not set + +# +# Generic Driver Options +# +CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" +CONFIG_STANDALONE=y +CONFIG_PREVENT_FIRMWARE_BUILD=y +# CONFIG_FW_LOADER is not set +# CONFIG_DEBUG_DRIVER is not set +# CONFIG_DEBUG_DEVRES is not set +# CONFIG_SYS_HYPERVISOR is not set + +# +# Networking +# +CONFIG_NET=y + +# +# Networking options +# +CONFIG_PACKET=y +CONFIG_PACKET_MMAP=y +CONFIG_UNIX=y +CONFIG_XFRM=y +# CONFIG_XFRM_USER is not set +# CONFIG_XFRM_SUB_POLICY is not set +# CONFIG_XFRM_MIGRATE is not set +# CONFIG_XFRM_STATISTICS is not set +# CONFIG_NET_KEY is not set +CONFIG_INET=y +# CONFIG_IP_MULTICAST is not set +# CONFIG_IP_ADVANCED_ROUTER is not set +CONFIG_IP_FIB_HASH=y +# CONFIG_IP_PNP is not set +# CONFIG_NET_IPIP is not set +# CONFIG_NET_IPGRE is not set +# CONFIG_ARPD is not set +# CONFIG_SYN_COOKIES is not set +# CONFIG_INET_AH is not set +# CONFIG_INET_ESP is not set +# CONFIG_INET_IPCOMP is not set +# CONFIG_INET_XFRM_TUNNEL is not set +# CONFIG_INET_TUNNEL is not set +CONFIG_INET_XFRM_MODE_TRANSPORT=y +CONFIG_INET_XFRM_MODE_TUNNEL=y +CONFIG_INET_XFRM_MODE_BEET=y +# CONFIG_INET_LRO is not set +CONFIG_INET_DIAG=y +CONFIG_INET_TCP_DIAG=y +# CONFIG_TCP_CONG_ADVANCED is not set +CONFIG_TCP_CONG_CUBIC=y +CONFIG_DEFAULT_TCP_CONG="cubic" +# CONFIG_TCP_MD5SIG is not set +# CONFIG_IPV6 is not set +# CONFIG_INET6_XFRM_TUNNEL is not set +# CONFIG_INET6_TUNNEL is not set +# CONFIG_NETWORK_SECMARK is not set +# CONFIG_NETFILTER is not set +# CONFIG_IP_DCCP is not set +# CONFIG_IP_SCTP is not set +# CONFIG_TIPC is not set +# CONFIG_ATM is not set +# CONFIG_BRIDGE is not set +# CONFIG_VLAN_8021Q is not set +# CONFIG_DECNET is not set +# CONFIG_LLC2 is not set +# CONFIG_IPX is not set +# CONFIG_ATALK is not set +# CONFIG_X25 is not set +# CONFIG_LAPB is not set +# CONFIG_ECONET is not set +# CONFIG_WAN_ROUTER is not set +# CONFIG_NET_SCHED is not set + +# +# Network testing +# +# CONFIG_NET_PKTGEN is not set +# CONFIG_HAMRADIO is not set +# CONFIG_CAN is not set +# CONFIG_IRDA is not set +# CONFIG_BT is not set +# CONFIG_AF_RXRPC is not set + +# +# Wireless +# +# CONFIG_CFG80211 is not set +# CONFIG_WIRELESS_EXT is not set +# CONFIG_MAC80211 is not set +# CONFIG_IEEE80211 is not set +# CONFIG_RFKILL is not set +# CONFIG_NET_9P is not set + +# +# UML Network Devices +# +CONFIG_UML_NET=y +CONFIG_UML_NET_ETHERTAP=y +CONFIG_UML_NET_TUNTAP=y +CONFIG_UML_NET_SLIP=y +CONFIG_UML_NET_DAEMON=y +# CONFIG_UML_NET_VDE is not set +CONFIG_UML_NET_MCAST=y +# CONFIG_UML_NET_PCAP is not set +CONFIG_UML_NET_SLIRP=y +CONFIG_NETDEVICES=y +# CONFIG_NETDEVICES_MULTIQUEUE is not set +CONFIG_DUMMY=m +# CONFIG_BONDING is not set +# CONFIG_MACVLAN is not set +# CONFIG_EQUALIZER is not set +CONFIG_TUN=m +# CONFIG_VETH is not set + +# +# Wireless LAN +# +# CONFIG_WLAN_PRE80211 is not set +# CONFIG_WLAN_80211 is not set +# CONFIG_WAN is not set +CONFIG_PPP=m +# CONFIG_PPP_MULTILINK is not set +# CONFIG_PPP_FILTER is not set +# CONFIG_PPP_ASYNC is not set +# CONFIG_PPP_SYNC_TTY is not set +# CONFIG_PPP_DEFLATE is not set +# CONFIG_PPP_BSDCOMP is not set +# CONFIG_PPP_MPPE is not set +# CONFIG_PPPOE is not set +# CONFIG_PPPOL2TP is not set +CONFIG_SLIP=m +# CONFIG_SLIP_COMPRESSED is not set +CONFIG_SLHC=m +# CONFIG_SLIP_SMART is not set +# CONFIG_SLIP_MODE_SLIP6 is not set +# CONFIG_NETCONSOLE is not set +# CONFIG_NETPOLL is not set +# CONFIG_NET_POLL_CONTROLLER is not set +# CONFIG_CONNECTOR is not set + +# +# File systems +# +CONFIG_EXT2_FS=y +# CONFIG_EXT2_FS_XATTR is not set +# CONFIG_EXT2_FS_XIP is not set +CONFIG_EXT3_FS=y +# CONFIG_EXT3_FS_XATTR is not set +# CONFIG_EXT4DEV_FS is not set +CONFIG_JBD=y +CONFIG_REISERFS_FS=y +# CONFIG_REISERFS_CHECK is not set +# CONFIG_REISERFS_PROC_INFO is not set +# CONFIG_REISERFS_FS_XATTR is not set +# CONFIG_JFS_FS is not set +# CONFIG_FS_POSIX_ACL is not set +# CONFIG_XFS_FS is not set +# CONFIG_GFS2_FS is not set +# CONFIG_OCFS2_FS is not set +# CONFIG_MINIX_FS is not set +# CONFIG_ROMFS_FS is not set +CONFIG_INOTIFY=y +CONFIG_INOTIFY_USER=y +CONFIG_QUOTA=y +# CONFIG_QUOTA_NETLINK_INTERFACE is not set +CONFIG_PRINT_QUOTA_WARNING=y +# CONFIG_QFMT_V1 is not set +# CONFIG_QFMT_V2 is not set +CONFIG_QUOTACTL=y +CONFIG_DNOTIFY=y +CONFIG_AUTOFS_FS=m +CONFIG_AUTOFS4_FS=m +# CONFIG_FUSE_FS is not set + +# +# CD-ROM/DVD Filesystems +# +CONFIG_ISO9660_FS=m +CONFIG_JOLIET=y +# CONFIG_ZISOFS is not set +# CONFIG_UDF_FS is not set + +# +# DOS/FAT/NT Filesystems +# +# CONFIG_MSDOS_FS is not set +# CONFIG_VFAT_FS is not set +# CONFIG_NTFS_FS is not set + +# +# Pseudo filesystems +# +CONFIG_PROC_FS=y +CONFIG_PROC_KCORE=y +CONFIG_PROC_SYSCTL=y +CONFIG_SYSFS=y +CONFIG_TMPFS=y +# CONFIG_TMPFS_POSIX_ACL is not set +# CONFIG_HUGETLB_PAGE is not set +# CONFIG_CONFIGFS_FS is not set + +# +# Miscellaneous filesystems +# +# CONFIG_ADFS_FS is not set +# CONFIG_AFFS_FS is not set +# CONFIG_HFS_FS is not set +# CONFIG_HFSPLUS_FS is not set +# CONFIG_BEFS_FS is not set +# CONFIG_BFS_FS is not set +# CONFIG_EFS_FS is not set +# CONFIG_CRAMFS is not set +# CONFIG_VXFS_FS is not set +# CONFIG_HPFS_FS is not set +# CONFIG_QNX4FS_FS is not set +# CONFIG_SYSV_FS is not set +# CONFIG_UFS_FS is not set +CONFIG_NETWORK_FILESYSTEMS=y +# CONFIG_NFS_FS is not set +# CONFIG_NFSD is not set +# CONFIG_SMB_FS is not set +# CONFIG_CIFS is not set +# CONFIG_NCP_FS is not set +# CONFIG_CODA_FS is not set +# CONFIG_AFS_FS is not set + +# +# Partition Types +# +# CONFIG_PARTITION_ADVANCED is not set +CONFIG_MSDOS_PARTITION=y +CONFIG_NLS=y +CONFIG_NLS_DEFAULT="iso8859-1" +# CONFIG_NLS_CODEPAGE_437 is not set +# CONFIG_NLS_CODEPAGE_737 is not set +# CONFIG_NLS_CODEPAGE_775 is not set +# CONFIG_NLS_CODEPAGE_850 is not set +# CONFIG_NLS_CODEPAGE_852 is not set +# CONFIG_NLS_CODEPAGE_855 is not set +# CONFIG_NLS_CODEPAGE_857 is not set +# CONFIG_NLS_CODEPAGE_860 is not set +# CONFIG_NLS_CODEPAGE_861 is not set +# CONFIG_NLS_CODEPAGE_862 is not set +# CONFIG_NLS_CODEPAGE_863 is not set +# CONFIG_NLS_CODEPAGE_864 is not set +# CONFIG_NLS_CODEPAGE_865 is not set +# CONFIG_NLS_CODEPAGE_866 is not set +# CONFIG_NLS_CODEPAGE_869 is not set +# CONFIG_NLS_CODEPAGE_936 is not set +# CONFIG_NLS_CODEPAGE_950 is not set +# CONFIG_NLS_CODEPAGE_932 is not set +# CONFIG_NLS_CODEPAGE_949 is not set +# CONFIG_NLS_CODEPAGE_874 is not set +# CONFIG_NLS_ISO8859_8 is not set +# CONFIG_NLS_CODEPAGE_1250 is not set +# CONFIG_NLS_CODEPAGE_1251 is not set +# CONFIG_NLS_ASCII is not set +# CONFIG_NLS_ISO8859_1 is not set +# CONFIG_NLS_ISO8859_2 is not set +# CONFIG_NLS_ISO8859_3 is not set +# CONFIG_NLS_ISO8859_4 is not set +# CONFIG_NLS_ISO8859_5 is not set +# CONFIG_NLS_ISO8859_6 is not set +# CONFIG_NLS_ISO8859_7 is not set +# CONFIG_NLS_ISO8859_9 is not set +# CONFIG_NLS_ISO8859_13 is not set +# CONFIG_NLS_ISO8859_14 is not set +# CONFIG_NLS_ISO8859_15 is not set +# CONFIG_NLS_KOI8_R is not set +# CONFIG_NLS_KOI8_U is not set +# CONFIG_NLS_UTF8 is not set +# CONFIG_DLM is not set + +# +# Security options +# +# CONFIG_KEYS is not set +# CONFIG_SECURITY is not set +# CONFIG_SECURITY_FILE_CAPABILITIES is not set +CONFIG_CRYPTO=y +# CONFIG_CRYPTO_SEQIV is not set +# CONFIG_CRYPTO_MANAGER is not set +# CONFIG_CRYPTO_HMAC is not set +# CONFIG_CRYPTO_XCBC is not set +# CONFIG_CRYPTO_NULL is not set +# CONFIG_CRYPTO_MD4 is not set +# CONFIG_CRYPTO_MD5 is not set +# CONFIG_CRYPTO_SHA1 is not set +# CONFIG_CRYPTO_SHA256 is not set +# CONFIG_CRYPTO_SHA512 is not set +# CONFIG_CRYPTO_WP512 is not set +# CONFIG_CRYPTO_TGR192 is not set +# CONFIG_CRYPTO_GF128MUL is not set +# CONFIG_CRYPTO_ECB is not set +# CONFIG_CRYPTO_CBC is not set +# CONFIG_CRYPTO_PCBC is not set +# CONFIG_CRYPTO_LRW is not set +# CONFIG_CRYPTO_XTS is not set +# CONFIG_CRYPTO_CTR is not set +# CONFIG_CRYPTO_GCM is not set +# CONFIG_CRYPTO_CCM is not set +# CONFIG_CRYPTO_CRYPTD is not set +# CONFIG_CRYPTO_DES is not set +# CONFIG_CRYPTO_FCRYPT is not set +# CONFIG_CRYPTO_BLOWFISH is not set +# CONFIG_CRYPTO_TWOFISH is not set +# CONFIG_CRYPTO_TWOFISH_586 is not set +# CONFIG_CRYPTO_SERPENT is not set +# CONFIG_CRYPTO_AES is not set +# CONFIG_CRYPTO_AES_586 is not set +# CONFIG_CRYPTO_CAST5 is not set +# CONFIG_CRYPTO_CAST6 is not set +# CONFIG_CRYPTO_TEA is not set +# CONFIG_CRYPTO_ARC4 is not set +# CONFIG_CRYPTO_KHAZAD is not set +# CONFIG_CRYPTO_ANUBIS is not set +# CONFIG_CRYPTO_SEED is not set +# CONFIG_CRYPTO_SALSA20 is not set +# CONFIG_CRYPTO_SALSA20_586 is not set +# CONFIG_CRYPTO_DEFLATE is not set +# CONFIG_CRYPTO_MICHAEL_MIC is not set +# CONFIG_CRYPTO_CRC32C is not set +# CONFIG_CRYPTO_CAMELLIA is not set +# CONFIG_CRYPTO_TEST is not set +# CONFIG_CRYPTO_AUTHENC is not set +# CONFIG_CRYPTO_LZO is not set +CONFIG_CRYPTO_HW=y + +# +# Library routines +# +CONFIG_BITREVERSE=m +# CONFIG_CRC_CCITT is not set +# CONFIG_CRC16 is not set +# CONFIG_CRC_ITU_T is not set +CONFIG_CRC32=m +# CONFIG_CRC7 is not set +# CONFIG_LIBCRC32C is not set +CONFIG_PLIST=y +CONFIG_HAS_DMA=y + +# +# SCSI device support +# +# CONFIG_RAID_ATTRS is not set +# CONFIG_SCSI is not set +# CONFIG_SCSI_DMA is not set +# CONFIG_SCSI_NETLINK is not set +# CONFIG_MD is not set +# CONFIG_INPUT is not set + +# +# Kernel hacking +# +# CONFIG_PRINTK_TIME is not set +CONFIG_ENABLE_WARN_DEPRECATED=y +CONFIG_ENABLE_MUST_CHECK=y +# CONFIG_UNUSED_SYMBOLS is not set +# CONFIG_DEBUG_FS is not set +CONFIG_DEBUG_KERNEL=y +# CONFIG_DEBUG_SHIRQ is not set +CONFIG_DETECT_SOFTLOCKUP=y +CONFIG_SCHED_DEBUG=y +# CONFIG_SCHEDSTATS is not set +# CONFIG_TIMER_STATS is not set +# CONFIG_DEBUG_SLAB is not set +# CONFIG_DEBUG_RT_MUTEXES is not set +# CONFIG_RT_MUTEX_TESTER is not set +# CONFIG_DEBUG_SPINLOCK is not set +# CONFIG_DEBUG_MUTEXES is not set +# CONFIG_DEBUG_SPINLOCK_SLEEP is not set +# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set +# CONFIG_DEBUG_KOBJECT is not set +CONFIG_DEBUG_BUGVERBOSE=y +CONFIG_DEBUG_INFO=y +# CONFIG_DEBUG_VM is not set +# CONFIG_DEBUG_LIST is not set +# CONFIG_DEBUG_SG is not set +CONFIG_FRAME_POINTER=y +CONFIG_FORCED_INLINING=y +# CONFIG_BOOT_PRINTK_DELAY is not set +# CONFIG_RCU_TORTURE_TEST is not set +# CONFIG_BACKTRACE_SELF_TEST is not set +# CONFIG_FAULT_INJECTION is not set +# CONFIG_SAMPLES is not set +# CONFIG_GPROF is not set +# CONFIG_GCOV is not set +# CONFIG_DEBUG_STACK_USAGE is not set diff --git a/arch/um/drivers/Makefile b/arch/um/drivers/Makefile new file mode 100644 index 0000000..1d9b6ae --- /dev/null +++ b/arch/um/drivers/Makefile @@ -0,0 +1,67 @@ +# +# Copyright (C) 2000, 2002, 2003 Jeff Dike (jdike@karaya.com) +# Licensed under the GPL +# + +# pcap is broken in 2.5 because kbuild doesn't allow pcap.a to be linked +# in to pcap.o + +slip-objs := slip_kern.o slip_user.o +slirp-objs := slirp_kern.o slirp_user.o +daemon-objs := daemon_kern.o daemon_user.o +mcast-objs := mcast_kern.o mcast_user.o +net-objs := net_kern.o net_user.o +mconsole-objs := mconsole_kern.o mconsole_user.o +hostaudio-objs := hostaudio_kern.o +ubd-objs := ubd_kern.o ubd_user.o +port-objs := port_kern.o port_user.o +harddog-objs := harddog_kern.o harddog_user.o + +LDFLAGS_pcap.o := -r $(shell $(CC) $(KBUILD_CFLAGS) -print-file-name=libpcap.a) + +LDFLAGS_vde.o := -r $(shell $(CC) $(CFLAGS) -print-file-name=libvdeplug.a) + +targets := pcap_kern.o pcap_user.o vde_kern.o vde_user.o + +$(obj)/pcap.o: $(obj)/pcap_kern.o $(obj)/pcap_user.o + $(LD) -r -dp -o $@ $^ $(LDFLAGS) $(LDFLAGS_pcap.o) + +$(obj)/vde.o: $(obj)/vde_kern.o $(obj)/vde_user.o + $(LD) -r -dp -o $@ $^ $(LDFLAGS) $(LDFLAGS_vde.o) + +#XXX: The call below does not work because the flags are added before the +# object name, so nothing from the library gets linked. +#$(call if_changed,ld) + +# When the above is fixed, don't forget to add this too! +#targets += $(obj)/pcap.o + +obj-y := stdio_console.o fd.o chan_kern.o chan_user.o line.o +obj-$(CONFIG_SSL) += ssl.o +obj-$(CONFIG_STDERR_CONSOLE) += stderr_console.o + +obj-$(CONFIG_UML_NET_SLIP) += slip.o slip_common.o +obj-$(CONFIG_UML_NET_SLIRP) += slirp.o slip_common.o +obj-$(CONFIG_UML_NET_DAEMON) += daemon.o +obj-$(CONFIG_UML_NET_VDE) += vde.o +obj-$(CONFIG_UML_NET_MCAST) += mcast.o +obj-$(CONFIG_UML_NET_PCAP) += pcap.o +obj-$(CONFIG_UML_NET) += net.o +obj-$(CONFIG_MCONSOLE) += mconsole.o +obj-$(CONFIG_MMAPPER) += mmapper_kern.o +obj-$(CONFIG_BLK_DEV_UBD) += ubd.o +obj-$(CONFIG_HOSTAUDIO) += hostaudio.o +obj-$(CONFIG_NULL_CHAN) += null.o +obj-$(CONFIG_PORT_CHAN) += port.o +obj-$(CONFIG_PTY_CHAN) += pty.o +obj-$(CONFIG_TTY_CHAN) += tty.o +obj-$(CONFIG_XTERM_CHAN) += xterm.o xterm_kern.o +obj-$(CONFIG_UML_WATCHDOG) += harddog.o +obj-$(CONFIG_BLK_DEV_COW_COMMON) += cow_user.o +obj-$(CONFIG_UML_RANDOM) += random.o + +# pcap_user.o must be added explicitly. +USER_OBJS := fd.o null.o pty.o tty.o xterm.o slip_common.o pcap_user.o vde_user.o +CFLAGS_null.o = -DDEV_NULL=$(DEV_NULL_PATH) + +include arch/um/scripts/Makefile.rules diff --git a/arch/um/drivers/chan_kern.c b/arch/um/drivers/chan_kern.c new file mode 100644 index 0000000..6e51424 --- /dev/null +++ b/arch/um/drivers/chan_kern.c @@ -0,0 +1,623 @@ +/* + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{linux.intel,addtoit}.com) + * Licensed under the GPL + */ + +#include <linux/slab.h> +#include <linux/tty.h> +#include <linux/tty_flip.h> +#include "chan_kern.h" +#include "os.h" + +#ifdef CONFIG_NOCONFIG_CHAN +static void *not_configged_init(char *str, int device, + const struct chan_opts *opts) +{ + printk(KERN_ERR "Using a channel type which is configured out of " + "UML\n"); + return NULL; +} + +static int not_configged_open(int input, int output, int primary, void *data, + char **dev_out) +{ + printk(KERN_ERR "Using a channel type which is configured out of " + "UML\n"); + return -ENODEV; +} + +static void not_configged_close(int fd, void *data) +{ + printk(KERN_ERR "Using a channel type which is configured out of " + "UML\n"); +} + +static int not_configged_read(int fd, char *c_out, void *data) +{ + printk(KERN_ERR "Using a channel type which is configured out of " + "UML\n"); + return -EIO; +} + +static int not_configged_write(int fd, const char *buf, int len, void *data) +{ + printk(KERN_ERR "Using a channel type which is configured out of " + "UML\n"); + return -EIO; +} + +static int not_configged_console_write(int fd, const char *buf, int len) +{ + printk(KERN_ERR "Using a channel type which is configured out of " + "UML\n"); + return -EIO; +} + +static int not_configged_window_size(int fd, void *data, unsigned short *rows, + unsigned short *cols) +{ + printk(KERN_ERR "Using a channel type which is configured out of " + "UML\n"); + return -ENODEV; +} + +static void not_configged_free(void *data) +{ + printk(KERN_ERR "Using a channel type which is configured out of " + "UML\n"); +} + +static const struct chan_ops not_configged_ops = { + .init = not_configged_init, + .open = not_configged_open, + .close = not_configged_close, + .read = not_configged_read, + .write = not_configged_write, + .console_write = not_configged_console_write, + .window_size = not_configged_window_size, + .free = not_configged_free, + .winch = 0, +}; +#endif /* CONFIG_NOCONFIG_CHAN */ + +static void tty_receive_char(struct tty_struct *tty, char ch) +{ + if (tty == NULL) + return; + + if (I_IXON(tty) && !I_IXOFF(tty) && !tty->raw) { + if (ch == STOP_CHAR(tty)) { + stop_tty(tty); + return; + } + else if (ch == START_CHAR(tty)) { + start_tty(tty); + return; + } + } + + tty_insert_flip_char(tty, ch, TTY_NORMAL); +} + +static int open_one_chan(struct chan *chan) +{ + int fd, err; + + if (chan->opened) + return 0; + + if (chan->ops->open == NULL) + fd = 0; + else fd = (*chan->ops->open)(chan->input, chan->output, chan->primary, + chan->data, &chan->dev); + if (fd < 0) + return fd; + + err = os_set_fd_block(fd, 0); + if (err) { + (*chan->ops->close)(fd, chan->data); + return err; + } + + chan->fd = fd; + + chan->opened = 1; + return 0; +} + +static int open_chan(struct list_head *chans) +{ + struct list_head *ele; + struct chan *chan; + int ret, err = 0; + + list_for_each(ele, chans) { + chan = list_entry(ele, struct chan, list); + ret = open_one_chan(chan); + if (chan->primary) + err = ret; + } + return err; +} + +void chan_enable_winch(struct list_head *chans, struct tty_struct *tty) +{ + struct list_head *ele; + struct chan *chan; + + list_for_each(ele, chans) { + chan = list_entry(ele, struct chan, list); + if (chan->primary && chan->output && chan->ops->winch) { + register_winch(chan->fd, tty); + return; + } + } +} + +int enable_chan(struct line *line) +{ + struct list_head *ele; + struct chan *chan; + int err; + + list_for_each(ele, &line->chan_list) { + chan = list_entry(ele, struct chan, list); + err = open_one_chan(chan); + if (err) { + if (chan->primary) + goto out_close; + + continue; + } + + if (chan->enabled) + continue; + err = line_setup_irq(chan->fd, chan->input, chan->output, line, + chan); + if (err) + goto out_close; + + chan->enabled = 1; + } + + return 0; + + out_close: + close_chan(&line->chan_list, 0); + return err; +} + +/* Items are added in IRQ context, when free_irq can't be called, and + * removed in process context, when it can. + * This handles interrupt sources which disappear, and which need to + * be permanently disabled. This is discovered in IRQ context, but + * the freeing of the IRQ must be done later. + */ +static DEFINE_SPINLOCK(irqs_to_free_lock); +static LIST_HEAD(irqs_to_free); + +void free_irqs(void) +{ + struct chan *chan; + LIST_HEAD(list); + struct list_head *ele; + unsigned long flags; + + spin_lock_irqsave(&irqs_to_free_lock, flags); + list_splice_init(&irqs_to_free, &list); + spin_unlock_irqrestore(&irqs_to_free_lock, flags); + + list_for_each(ele, &list) { + chan = list_entry(ele, struct chan, free_list); + + if (chan->input) + free_irq(chan->line->driver->read_irq, chan); + if (chan->output) + free_irq(chan->line->driver->write_irq, chan); + chan->enabled = 0; + } +} + +static void close_one_chan(struct chan *chan, int delay_free_irq) +{ + unsigned long flags; + + if (!chan->opened) + return; + + if (delay_free_irq) { + spin_lock_irqsave(&irqs_to_free_lock, flags); + list_add(&chan->free_list, &irqs_to_free); + spin_unlock_irqrestore(&irqs_to_free_lock, flags); + } + else { + if (chan->input) + free_irq(chan->line->driver->read_irq, chan); + if (chan->output) + free_irq(chan->line->driver->write_irq, chan); + chan->enabled = 0; + } + if (chan->ops->close != NULL) + (*chan->ops->close)(chan->fd, chan->data); + + chan->opened = 0; + chan->fd = -1; +} + +void close_chan(struct list_head *chans, int delay_free_irq) +{ + struct chan *chan; + + /* Close in reverse order as open in case more than one of them + * refers to the same device and they save and restore that device's + * state. Then, the first one opened will have the original state, + * so it must be the last closed. + */ + list_for_each_entry_reverse(chan, chans, list) { + close_one_chan(chan, delay_free_irq); + } +} + +void deactivate_chan(struct list_head *chans, int irq) +{ + struct list_head *ele; + + struct chan *chan; + list_for_each(ele, chans) { + chan = list_entry(ele, struct chan, list); + + if (chan->enabled && chan->input) + deactivate_fd(chan->fd, irq); + } +} + +void reactivate_chan(struct list_head *chans, int irq) +{ + struct list_head *ele; + struct chan *chan; + + list_for_each(ele, chans) { + chan = list_entry(ele, struct chan, list); + + if (chan->enabled && chan->input) + reactivate_fd(chan->fd, irq); + } +} + +int write_chan(struct list_head *chans, const char *buf, int len, + int write_irq) +{ + struct list_head *ele; + struct chan *chan = NULL; + int n, ret = 0; + + if (len == 0) + return 0; + + list_for_each(ele, chans) { + chan = list_entry(ele, struct chan, list); + if (!chan->output || (chan->ops->write == NULL)) + continue; + + n = chan->ops->write(chan->fd, buf, len, chan->data); + if (chan->primary) { + ret = n; + if ((ret == -EAGAIN) || ((ret >= 0) && (ret < len))) + reactivate_fd(chan->fd, write_irq); + } + } + return ret; +} + +int console_write_chan(struct list_head *chans, const char *buf, int len) +{ + struct list_head *ele; + struct chan *chan; + int n, ret = 0; + + list_for_each(ele, chans) { + chan = list_entry(ele, struct chan, list); + if (!chan->output || (chan->ops->console_write == NULL)) + continue; + + n = chan->ops->console_write(chan->fd, buf, len); + if (chan->primary) + ret = n; + } + return ret; +} + +int console_open_chan(struct line *line, struct console *co) +{ + int err; + + err = open_chan(&line->chan_list); + if (err) + return err; + + printk(KERN_INFO "Console initialized on /dev/%s%d\n", co->name, + co->index); + return 0; +} + +int chan_window_size(struct list_head *chans, unsigned short *rows_out, + unsigned short *cols_out) +{ + struct list_head *ele; + struct chan *chan; + + list_for_each(ele, chans) { + chan = list_entry(ele, struct chan, list); + if (chan->primary) { + if (chan->ops->window_size == NULL) + return 0; + return chan->ops->window_size(chan->fd, chan->data, + rows_out, cols_out); + } + } + return 0; +} + +static void free_one_chan(struct chan *chan, int delay_free_irq) +{ + list_del(&chan->list); + + close_one_chan(chan, delay_free_irq); + + if (chan->ops->free != NULL) + (*chan->ops->free)(chan->data); + + if (chan->primary && chan->output) + ignore_sigio_fd(chan->fd); + kfree(chan); +} + +static void free_chan(struct list_head *chans, int delay_free_irq) +{ + struct list_head *ele, *next; + struct chan *chan; + + list_for_each_safe(ele, next, chans) { + chan = list_entry(ele, struct chan, list); + free_one_chan(chan, delay_free_irq); + } +} + +static int one_chan_config_string(struct chan *chan, char *str, int size, + char **error_out) +{ + int n = 0; + + if (chan == NULL) { + CONFIG_CHUNK(str, size, n, "none", 1); + return n; + } + + CONFIG_CHUNK(str, size, n, chan->ops->type, 0); + + if (chan->dev == NULL) { + CONFIG_CHUNK(str, size, n, "", 1); + return n; + } + + CONFIG_CHUNK(str, size, n, ":", 0); + CONFIG_CHUNK(str, size, n, chan->dev, 0); + + return n; +} + +static int chan_pair_config_string(struct chan *in, struct chan *out, + char *str, int size, char **error_out) +{ + int n; + + n = one_chan_config_string(in, str, size, error_out); + str += n; + size -= n; + + if (in == out) { + CONFIG_CHUNK(str, size, n, "", 1); + return n; + } + + CONFIG_CHUNK(str, size, n, ",", 1); + n = one_chan_config_string(out, str, size, error_out); + str += n; + size -= n; + CONFIG_CHUNK(str, size, n, "", 1); + + return n; +} + +int chan_config_string(struct list_head *chans, char *str, int size, + char **error_out) +{ + struct list_head *ele; + struct chan *chan, *in = NULL, *out = NULL; + + list_for_each(ele, chans) { + chan = list_entry(ele, struct chan, list); + if (!chan->primary) + continue; + if (chan->input) + in = chan; + if (chan->output) + out = chan; + } + + return chan_pair_config_string(in, out, str, size, error_out); +} + +struct chan_type { + char *key; + const struct chan_ops *ops; +}; + +static const struct chan_type chan_table[] = { + { "fd", &fd_ops }, + +#ifdef CONFIG_NULL_CHAN + { "null", &null_ops }, +#else + { "null", ¬_configged_ops }, +#endif + +#ifdef CONFIG_PORT_CHAN + { "port", &port_ops }, +#else + { "port", ¬_configged_ops }, +#endif + +#ifdef CONFIG_PTY_CHAN + { "pty", &pty_ops }, + { "pts", &pts_ops }, +#else + { "pty", ¬_configged_ops }, + { "pts", ¬_configged_ops }, +#endif + +#ifdef CONFIG_TTY_CHAN + { "tty", &tty_ops }, +#else + { "tty", ¬_configged_ops }, +#endif + +#ifdef CONFIG_XTERM_CHAN + { "xterm", &xterm_ops }, +#else + { "xterm", ¬_configged_ops }, +#endif +}; + +static struct chan *parse_chan(struct line *line, char *str, int device, + const struct chan_opts *opts, char **error_out) +{ + const struct chan_type *entry; + const struct chan_ops *ops; + struct chan *chan; + void *data; + int i; + + ops = NULL; + data = NULL; + for(i = 0; i < ARRAY_SIZE(chan_table); i++) { + entry = &chan_table[i]; + if (!strncmp(str, entry->key, strlen(entry->key))) { + ops = entry->ops; + str += strlen(entry->key); + break; + } + } + if (ops == NULL) { + *error_out = "No match for configured backends"; + return NULL; + } + + data = (*ops->init)(str, device, opts); + if (data == NULL) { + *error_out = "Configuration failed"; + return NULL; + } + + chan = kmalloc(sizeof(*chan), GFP_ATOMIC); + if (chan == NULL) { + *error_out = "Memory allocation failed"; + return NULL; + } + *chan = ((struct chan) { .list = LIST_HEAD_INIT(chan->list), + .free_list = + LIST_HEAD_INIT(chan->free_list), + .line = line, + .primary = 1, + .input = 0, + .output = 0, + .opened = 0, + .enabled = 0, + .fd = -1, + .ops = ops, + .data = data }); + return chan; +} + +int parse_chan_pair(char *str, struct line *line, int device, + const struct chan_opts *opts, char **error_out) +{ + struct list_head *chans = &line->chan_list; + struct chan *new, *chan; + char *in, *out; + + if (!list_empty(chans)) { + chan = list_entry(chans->next, struct chan, list); + free_chan(chans, 0); + INIT_LIST_HEAD(chans); + } + + out = strchr(str, ','); + if (out != NULL) { + in = str; + *out = '\0'; + out++; + new = parse_chan(line, in, device, opts, error_out); + if (new == NULL) + return -1; + + new->input = 1; + list_add(&new->list, chans); + + new = parse_chan(line, out, device, opts, error_out); + if (new == NULL) + return -1; + + list_add(&new->list, chans); + new->output = 1; + } + else { + new = parse_chan(line, str, device, opts, error_out); + if (new == NULL) + return -1; + + list_add(&new->list, chans); + new->input = 1; + new->output = 1; + } + return 0; +} + +void chan_interrupt(struct list_head *chans, struct delayed_work *task, + struct tty_struct *tty, int irq) +{ + struct list_head *ele, *next; + struct chan *chan; + int err; + char c; + + list_for_each_safe(ele, next, chans) { + chan = list_entry(ele, struct chan, list); + if (!chan->input || (chan->ops->read == NULL)) + continue; + do { + if (tty && !tty_buffer_request_room(tty, 1)) { + schedule_delayed_work(task, 1); + goto out; + } + err = chan->ops->read(chan->fd, &c, chan->data); + if (err > 0) + tty_receive_char(tty, c); + } while (err > 0); + + if (err == 0) + reactivate_fd(chan->fd, irq); + if (err == -EIO) { + if (chan->primary) { + if (tty != NULL) + tty_hangup(tty); + close_chan(chans, 1); + return; + } + else close_one_chan(chan, 1); + } + } + out: + if (tty) + tty_flip_buffer_push(tty); +} diff --git a/arch/um/drivers/chan_user.c b/arch/um/drivers/chan_user.c new file mode 100644 index 0000000..cfeb3f4 --- /dev/null +++ b/arch/um/drivers/chan_user.c @@ -0,0 +1,298 @@ +/* + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{linux.intel,addtoit}.com) + * Licensed under the GPL + */ + +#include <stdlib.h> +#include <unistd.h> +#include <errno.h> +#include <sched.h> +#include <signal.h> +#include <termios.h> +#include <sys/ioctl.h> +#include "chan_user.h" +#include "kern_constants.h" +#include "os.h" +#include "um_malloc.h" +#include "user.h" + +void generic_close(int fd, void *unused) +{ + close(fd); +} + +int generic_read(int fd, char *c_out, void *unused) +{ + int n; + + n = read(fd, c_out, sizeof(*c_out)); + if (n > 0) + return n; + else if (errno == EAGAIN) + return 0; + else if (n == 0) + return -EIO; + return -errno; +} + +/* XXX Trivial wrapper around write */ + +int generic_write(int fd, const char *buf, int n, void *unused) +{ + int err; + + err = write(fd, buf, n); + if (err > 0) + return err; + else if (errno == EAGAIN) + return 0; + else if (err == 0) + return -EIO; + return -errno; +} + +int generic_window_size(int fd, void *unused, unsigned short *rows_out, + unsigned short *cols_out) +{ + struct winsize size; + int ret; + + if (ioctl(fd, TIOCGWINSZ, &size) < 0) + return -errno; + + ret = ((*rows_out != size.ws_row) || (*cols_out != size.ws_col)); + + *rows_out = size.ws_row; + *cols_out = size.ws_col; + + return ret; +} + +void generic_free(void *data) +{ + kfree(data); +} + +int generic_console_write(int fd, const char *buf, int n) +{ + sigset_t old, no_sigio; + struct termios save, new; + int err; + + if (isatty(fd)) { + sigemptyset(&no_sigio); + sigaddset(&no_sigio, SIGIO); + if (sigprocmask(SIG_BLOCK, &no_sigio, &old)) + goto error; + + CATCH_EINTR(err = tcgetattr(fd, &save)); + if (err) + goto error; + new = save; + /* + * The terminal becomes a bit less raw, to handle \n also as + * "Carriage Return", not only as "New Line". Otherwise, the new + * line won't start at the first column. + */ + new.c_oflag |= OPOST; + CATCH_EINTR(err = tcsetattr(fd, TCSAFLUSH, &new)); + if (err) + goto error; + } + err = generic_write(fd, buf, n, NULL); + /* + * Restore raw mode, in any case; we *must* ignore any error apart + * EINTR, except for debug. + */ + if (isatty(fd)) { + CATCH_EINTR(tcsetattr(fd, TCSAFLUSH, &save)); + sigprocmask(SIG_SETMASK, &old, NULL); + } + + return err; +error: + return -errno; +} + +/* + * UML SIGWINCH handling + * + * The point of this is to handle SIGWINCH on consoles which have host + * ttys and relay them inside UML to whatever might be running on the + * console and cares about the window size (since SIGWINCH notifies + * about terminal size changes). + * + * So, we have a separate thread for each host tty attached to a UML + * device (side-issue - I'm annoyed that one thread can't have + * multiple controlling ttys for the purpose of handling SIGWINCH, but + * I imagine there are other reasons that doesn't make any sense). + * + * SIGWINCH can't be received synchronously, so you have to set up to + * receive it as a signal. That being the case, if you are going to + * wait for it, it is convenient to sit in sigsuspend() and wait for + * the signal to bounce you out of it (see below for how we make sure + * to exit only on SIGWINCH). + */ + +static void winch_handler(int sig) +{ +} + +struct winch_data { + int pty_fd; + int pipe_fd; +}; + +static int winch_thread(void *arg) +{ + struct winch_data *data = arg; + sigset_t sigs; + int pty_fd, pipe_fd; + int count; + char c = 1; + + pty_fd = data->pty_fd; + pipe_fd = data->pipe_fd; + count = write(pipe_fd, &c, sizeof(c)); + if (count != sizeof(c)) + printk(UM_KERN_ERR "winch_thread : failed to write " + "synchronization byte, err = %d\n", -count); + + /* + * We are not using SIG_IGN on purpose, so don't fix it as I thought to + * do! If using SIG_IGN, the sigsuspend() call below would not stop on + * SIGWINCH. + */ + + signal(SIGWINCH, winch_handler); + sigfillset(&sigs); + /* Block all signals possible. */ + if (sigprocmask(SIG_SETMASK, &sigs, NULL) < 0) { + printk(UM_KERN_ERR "winch_thread : sigprocmask failed, " + "errno = %d\n", errno); + exit(1); + } + /* In sigsuspend(), block anything else than SIGWINCH. */ + sigdelset(&sigs, SIGWINCH); + + if (setsid() < 0) { + printk(UM_KERN_ERR "winch_thread : setsid failed, errno = %d\n", + errno); + exit(1); + } + + if (ioctl(pty_fd, TIOCSCTTY, 0) < 0) { + printk(UM_KERN_ERR "winch_thread : TIOCSCTTY failed on " + "fd %d err = %d\n", pty_fd, errno); + exit(1); + } + + if (tcsetpgrp(pty_fd, os_getpid()) < 0) { + printk(UM_KERN_ERR "winch_thread : tcsetpgrp failed on " + "fd %d err = %d\n", pty_fd, errno); + exit(1); + } + + /* + * These are synchronization calls between various UML threads on the + * host - since they are not different kernel threads, we cannot use + * kernel semaphores. We don't use SysV semaphores because they are + * persistent. + */ + count = read(pipe_fd, &c, sizeof(c)); + if (count != sizeof(c)) + printk(UM_KERN_ERR "winch_thread : failed to read " + "synchronization byte, err = %d\n", errno); + + while(1) { + /* + * This will be interrupted by SIGWINCH only, since + * other signals are blocked. + */ + sigsuspend(&sigs); + + count = write(pipe_fd, &c, sizeof(c)); + if (count != sizeof(c)) + printk(UM_KERN_ERR "winch_thread : write failed, " + "err = %d\n", errno); + } +} + +static int winch_tramp(int fd, struct tty_struct *tty, int *fd_out, + unsigned long *stack_out) +{ + struct winch_data data; + int fds[2], n, err; + char c; + + err = os_pipe(fds, 1, 1); + if (err < 0) { + printk(UM_KERN_ERR "winch_tramp : os_pipe failed, err = %d\n", + -err); + goto out; + } + + data = ((struct winch_data) { .pty_fd = fd, + .pipe_fd = fds[1] } ); + /* + * CLONE_FILES so this thread doesn't hold open files which are open + * now, but later closed in a different thread. This is a + * problem with /dev/net/tun, which if held open by this + * thread, prevents the TUN/TAP device from being reused. + */ + err = run_helper_thread(winch_thread, &data, CLONE_FILES, stack_out); + if (err < 0) { + printk(UM_KERN_ERR "fork of winch_thread failed - errno = %d\n", + -err); + goto out_close; + } + + *fd_out = fds[0]; + n = read(fds[0], &c, sizeof(c)); + if (n != sizeof(c)) { + printk(UM_KERN_ERR "winch_tramp : failed to read " + "synchronization byte\n"); + printk(UM_KERN_ERR "read failed, err = %d\n", errno); + printk(UM_KERN_ERR "fd %d will not support SIGWINCH\n", fd); + err = -EINVAL; + goto out_close; + } + + if (os_set_fd_block(*fd_out, 0)) { + printk(UM_KERN_ERR "winch_tramp: failed to set thread_fd " + "non-blocking.\n"); + goto out_close; + } + + return err; + + out_close: + close(fds[1]); + close(fds[0]); + out: + return err; +} + +void register_winch(int fd, struct tty_struct *tty) +{ + unsigned long stack; + int pid, thread, count, thread_fd = -1; + char c = 1; + + if (!isatty(fd)) + return; + + pid = tcgetpgrp(fd); + if (!is_skas_winch(pid, fd, tty) && (pid == -1)) { + thread = winch_tramp(fd, tty, &thread_fd, &stack); + if (thread < 0) + return; + + register_winch_irq(thread_fd, fd, thread, tty, stack); + + count = write(thread_fd, &c, sizeof(c)); + if (count != sizeof(c)) + printk(UM_KERN_ERR "register_winch : failed to write " + "synchronization byte, err = %d\n", errno); + } +} diff --git a/arch/um/drivers/cow.h b/arch/um/drivers/cow.h new file mode 100644 index 0000000..dc36b22 --- /dev/null +++ b/arch/um/drivers/cow.h @@ -0,0 +1,67 @@ +#ifndef __COW_H__ +#define __COW_H__ + +#include <asm/types.h> + +#if defined(__KERNEL__) + +# include <asm/byteorder.h> + +# if defined(__BIG_ENDIAN) +# define ntohll(x) (x) +# define htonll(x) (x) +# elif defined(__LITTLE_ENDIAN) +# define ntohll(x) be64_to_cpu(x) +# define htonll(x) cpu_to_be64(x) +# else +# error "Could not determine byte order" +# endif + +#else +/* For the definition of ntohl, htonl and __BYTE_ORDER */ +#include <endian.h> +#include <netinet/in.h> +#if defined(__BYTE_ORDER) + +# if __BYTE_ORDER == __BIG_ENDIAN +# define ntohll(x) (x) +# define htonll(x) (x) +# elif __BYTE_ORDER == __LITTLE_ENDIAN +# define ntohll(x) bswap_64(x) +# define htonll(x) bswap_64(x) +# else +# error "Could not determine byte order: __BYTE_ORDER uncorrectly defined" +# endif + +#else /* ! defined(__BYTE_ORDER) */ +# error "Could not determine byte order: __BYTE_ORDER not defined" +#endif +#endif /* ! defined(__KERNEL__) */ + +extern int init_cow_file(int fd, char *cow_file, char *backing_file, + int sectorsize, int alignment, int *bitmap_offset_out, + unsigned long *bitmap_len_out, int *data_offset_out); + +extern int file_reader(__u64 offset, char *buf, int len, void *arg); +extern int read_cow_header(int (*reader)(__u64, char *, int, void *), + void *arg, __u32 *version_out, + char **backing_file_out, time_t *mtime_out, + unsigned long long *size_out, int *sectorsize_out, + __u32 *align_out, int *bitmap_offset_out); + +extern int write_cow_header(char *cow_file, int fd, char *backing_file, + int sectorsize, int alignment, + unsigned long long *size); + +extern void cow_sizes(int version, __u64 size, int sectorsize, int align, + int bitmap_offset, unsigned long *bitmap_len_out, + int *data_offset_out); + +#endif + +/* + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff --git a/arch/um/drivers/cow_sys.h b/arch/um/drivers/cow_sys.h new file mode 100644 index 0000000..f5701fd --- /dev/null +++ b/arch/um/drivers/cow_sys.h @@ -0,0 +1,41 @@ +#ifndef __COW_SYS_H__ +#define __COW_SYS_H__ + +#include "kern_util.h" +#include "os.h" +#include "user.h" +#include "um_malloc.h" + +static inline void *cow_malloc(int size) +{ + return uml_kmalloc(size, UM_GFP_KERNEL); +} + +static inline void cow_free(void *ptr) +{ + kfree(ptr); +} + +#define cow_printf printk + +static inline char *cow_strdup(char *str) +{ + return uml_strdup(str); +} + +static inline int cow_seek_file(int fd, __u64 offset) +{ + return os_seek_file(fd, offset); +} + +static inline int cow_file_size(char *file, unsigned long long *size_out) +{ + return os_file_size(file, size_out); +} + +static inline int cow_write_file(int fd, void *buf, int size) +{ + return os_write_file(fd, buf, size); +} + +#endif diff --git a/arch/um/drivers/cow_user.c b/arch/um/drivers/cow_user.c new file mode 100644 index 0000000..93f227a --- /dev/null +++ b/arch/um/drivers/cow_user.c @@ -0,0 +1,440 @@ +/* + * Copyright (C) 2007 Jeff Dike (jdike@{linux.intel,addtoit}.com) + * Licensed under the GPL + */ + +/* + * _XOPEN_SOURCE is needed for pread, but we define _GNU_SOURCE, which defines + * that. + */ +#include <unistd.h> +#include <byteswap.h> +#include <errno.h> +#include <string.h> +#include <arpa/inet.h> +#include <asm/types.h> +#include "cow.h" +#include "cow_sys.h" + +#define PATH_LEN_V1 256 + +typedef __u32 time32_t; + +struct cow_header_v1 { + __s32 magic; + __s32 version; + char backing_file[PATH_LEN_V1]; + time32_t mtime; + __u64 size; + __s32 sectorsize; +} __attribute__((packed)); + +/* + * Define PATH_LEN_V3 as the usual value of MAXPATHLEN, just hard-code it in + * case other systems have different values for MAXPATHLEN. + * + * The same must hold for V2 - we want file format compatibility, not anything + * else. + */ +#define PATH_LEN_V3 4096 +#define PATH_LEN_V2 PATH_LEN_V3 + +struct cow_header_v2 { + __u32 magic; + __u32 version; + char backing_file[PATH_LEN_V2]; + time32_t mtime; + __u64 size; + __s32 sectorsize; +} __attribute__((packed)); + +/* + * Changes from V2 - + * PATH_LEN_V3 as described above + * Explicitly specify field bit lengths for systems with different + * lengths for the usual C types. Not sure whether char or + * time_t should be changed, this can be changed later without + * breaking compatibility + * Add alignment field so that different alignments can be used for the + * bitmap and data + * Add cow_format field to allow for the possibility of different ways + * of specifying the COW blocks. For now, the only value is 0, + * for the traditional COW bitmap. + * Move the backing_file field to the end of the header. This allows + * for the possibility of expanding it into the padding required + * by the bitmap alignment. + * The bitmap and data portions of the file will be aligned as specified + * by the alignment field. This is to allow COW files to be + * put on devices with restrictions on access alignments, such as + * /dev/raw, with a 512 byte alignment restriction. This also + * allows the data to be more aligned more strictly than on + * sector boundaries. This is needed for ubd-mmap, which needs + * the data to be page aligned. + * Fixed (finally!) the rounding bug + */ + +/* + * Until Dec2005, __attribute__((packed)) was left out from the below + * definition, leading on 64-bit systems to 4 bytes of padding after mtime, to + * align size to 8-byte alignment. This shifted all fields above (no padding + * was present on 32-bit, no other padding was added). + * + * However, this _can be detected_: it means that cow_format (always 0 until + * now) is shifted onto the first 4 bytes of backing_file, where it is otherwise + * impossible to find 4 zeros. -bb */ + +struct cow_header_v3 { + __u32 magic; + __u32 version; + __u32 mtime; + __u64 size; + __u32 sectorsize; + __u32 alignment; + __u32 cow_format; + char backing_file[PATH_LEN_V3]; +} __attribute__((packed)); + +/* This is the broken layout used by some 64-bit binaries. */ +struct cow_header_v3_broken { + __u32 magic; + __u32 version; + __s64 mtime; + __u64 size; + __u32 sectorsize; + __u32 alignment; + __u32 cow_format; + char backing_file[PATH_LEN_V3]; +}; + +/* COW format definitions - for now, we have only the usual COW bitmap */ +#define COW_BITMAP 0 + +union cow_header { + struct cow_header_v1 v1; + struct cow_header_v2 v2; + struct cow_header_v3 v3; + struct cow_header_v3_broken v3_b; +}; + +#define COW_MAGIC 0x4f4f4f4d /* MOOO */ +#define COW_VERSION 3 + +#define DIV_ROUND(x, len) (((x) + (len) - 1) / (len)) +#define ROUND_UP(x, align) DIV_ROUND(x, align) * (align) + +void cow_sizes(int version, __u64 size, int sectorsize, int align, + int bitmap_offset, unsigned long *bitmap_len_out, + int *data_offset_out) +{ + if (version < 3) { + *bitmap_len_out = (size + sectorsize - 1) / (8 * sectorsize); + + *data_offset_out = bitmap_offset + *bitmap_len_out; + *data_offset_out = (*data_offset_out + sectorsize - 1) / + sectorsize; + *data_offset_out *= sectorsize; + } + else { + *bitmap_len_out = DIV_ROUND(size, sectorsize); + *bitmap_len_out = DIV_ROUND(*bitmap_len_out, 8); + + *data_offset_out = bitmap_offset + *bitmap_len_out; + *data_offset_out = ROUND_UP(*data_offset_out, align); + } +} + +static int absolutize(char *to, int size, char *from) +{ + char save_cwd[256], *slash; + int remaining; + + if (getcwd(save_cwd, sizeof(save_cwd)) == NULL) { + cow_printf("absolutize : unable to get cwd - errno = %d\n", + errno); + return -1; + } + slash = strrchr(from, '/'); + if (slash != NULL) { + *slash = '\0'; + if (chdir(from)) { + *slash = '/'; + cow_printf("absolutize : Can't cd to '%s' - " + "errno = %d\n", from, errno); + return -1; + } + *slash = '/'; + if (getcwd(to, size) == NULL) { + cow_printf("absolutize : unable to get cwd of '%s' - " + "errno = %d\n", from, errno); + return -1; + } + remaining = size - strlen(to); + if (strlen(slash) + 1 > remaining) { + cow_printf("absolutize : unable to fit '%s' into %d " + "chars\n", from, size); + return -1; + } + strcat(to, slash); + } + else { + if (strlen(save_cwd) + 1 + strlen(from) + 1 > size) { + cow_printf("absolutize : unable to fit '%s' into %d " + "chars\n", from, size); + return -1; + } + strcpy(to, save_cwd); + strcat(to, "/"); + strcat(to, from); + } + chdir(save_cwd); + return 0; +} + +int write_cow_header(char *cow_file, int fd, char *backing_file, + int sectorsize, int alignment, unsigned long long *size) +{ + struct cow_header_v3 *header; + unsigned long modtime; + int err; + + err = cow_seek_file(fd, 0); + if (err < 0) { + cow_printf("write_cow_header - lseek failed, err = %d\n", -err); + goto out; + } + + err = -ENOMEM; + header = cow_malloc(sizeof(*header)); + if (header == NULL) { + cow_printf("write_cow_header - failed to allocate COW V3 " + "header\n"); + goto out; + } + header->magic = htonl(COW_MAGIC); + header->version = htonl(COW_VERSION); + + err = -EINVAL; + if (strlen(backing_file) > sizeof(header->backing_file) - 1) { + /* Below, %zd is for a size_t value */ + cow_printf("Backing file name \"%s\" is too long - names are " + "limited to %zd characters\n", backing_file, + sizeof(header->backing_file) - 1); + goto out_free; + } + + if (absolutize(header->backing_file, sizeof(header->backing_file), + backing_file)) + goto out_free; + + err = os_file_modtime(header->backing_file, &modtime); + if (err < 0) { + cow_printf("write_cow_header - backing file '%s' mtime " + "request failed, err = %d\n", header->backing_file, + -err); + goto out_free; + } + + err = cow_file_size(header->backing_file, size); + if (err < 0) { + cow_printf("write_cow_header - couldn't get size of " + "backing file '%s', err = %d\n", + header->backing_file, -err); + goto out_free; + } + + header->mtime = htonl(modtime); + header->size = htonll(*size); + header->sectorsize = htonl(sectorsize); + header->alignment = htonl(alignment); + header->cow_format = COW_BITMAP; + + err = cow_write_file(fd, header, sizeof(*header)); + if (err != sizeof(*header)) { + cow_printf("write_cow_header - write of header to " + "new COW file '%s' failed, err = %d\n", cow_file, + -err); + goto out_free; + } + err = 0; + out_free: + cow_free(header); + out: + return err; +} + +int file_reader(__u64 offset, char *buf, int len, void *arg) +{ + int fd = *((int *) arg); + + return pread(fd, buf, len, offset); +} + +/* XXX Need to sanity-check the values read from the header */ + +int read_cow_header(int (*reader)(__u64, char *, int, void *), void *arg, + __u32 *version_out, char **backing_file_out, + time_t *mtime_out, unsigned long long *size_out, + int *sectorsize_out, __u32 *align_out, + int *bitmap_offset_out) +{ + union cow_header *header; + char *file; + int err, n; + unsigned long version, magic; + + header = cow_malloc(sizeof(*header)); + if (header == NULL) { + cow_printf("read_cow_header - Failed to allocate header\n"); + return -ENOMEM; + } + err = -EINVAL; + n = (*reader)(0, (char *) header, sizeof(*header), arg); + if (n < offsetof(typeof(header->v1), backing_file)) { + cow_printf("read_cow_header - short header\n"); + goto out; + } + + magic = header->v1.magic; + if (magic == COW_MAGIC) + version = header->v1.version; + else if (magic == ntohl(COW_MAGIC)) + version = ntohl(header->v1.version); + /* No error printed because the non-COW case comes through here */ + else goto out; + + *version_out = version; + + if (version == 1) { + if (n < sizeof(header->v1)) { + cow_printf("read_cow_header - failed to read V1 " + "header\n"); + goto out; + } + *mtime_out = header->v1.mtime; + *size_out = header->v1.size; + *sectorsize_out = header->v1.sectorsize; + *bitmap_offset_out = sizeof(header->v1); + *align_out = *sectorsize_out; + file = header->v1.backing_file; + } + else if (version == 2) { + if (n < sizeof(header->v2)) { + cow_printf("read_cow_header - failed to read V2 " + "header\n"); + goto out; + } + *mtime_out = ntohl(header->v2.mtime); + *size_out = ntohll(header->v2.size); + *sectorsize_out = ntohl(header->v2.sectorsize); + *bitmap_offset_out = sizeof(header->v2); + *align_out = *sectorsize_out; + file = header->v2.backing_file; + } + /* This is very subtle - see above at union cow_header definition */ + else if (version == 3 && (*((int*)header->v3.backing_file) != 0)) { + if (n < sizeof(header->v3)) { + cow_printf("read_cow_header - failed to read V3 " + "header\n"); + goto out; + } + *mtime_out = ntohl(header->v3.mtime); + *size_out = ntohll(header->v3.size); + *sectorsize_out = ntohl(header->v3.sectorsize); + *align_out = ntohl(header->v3.alignment); + if (*align_out == 0) { + cow_printf("read_cow_header - invalid COW header, " + "align == 0\n"); + } + *bitmap_offset_out = ROUND_UP(sizeof(header->v3), *align_out); + file = header->v3.backing_file; + } + else if (version == 3) { + cow_printf("read_cow_header - broken V3 file with" + " 64-bit layout - recovering content.\n"); + + if (n < sizeof(header->v3_b)) { + cow_printf("read_cow_header - failed to read V3 " + "header\n"); + goto out; + } + + /* + * this was used until Dec2005 - 64bits are needed to represent + * 2038+. I.e. we can safely do this truncating cast. + * + * Additionally, we must use ntohl() instead of ntohll(), since + * the program used to use the former (tested - I got mtime + * mismatch "0 vs whatever"). + * + * Ever heard about bug-to-bug-compatibility ? ;-) */ + *mtime_out = (time32_t) ntohl(header->v3_b.mtime); + + *size_out = ntohll(header->v3_b.size); + *sectorsize_out = ntohl(header->v3_b.sectorsize); + *align_out = ntohl(header->v3_b.alignment); + if (*align_out == 0) { + cow_printf("read_cow_header - invalid COW header, " + "align == 0\n"); + } + *bitmap_offset_out = ROUND_UP(sizeof(header->v3_b), *align_out); + file = header->v3_b.backing_file; + } + else { + cow_printf("read_cow_header - invalid COW version\n"); + goto out; + } + err = -ENOMEM; + *backing_file_out = cow_strdup(file); + if (*backing_file_out == NULL) { + cow_printf("read_cow_header - failed to allocate backing " + "file\n"); + goto out; + } + err = 0; + out: + cow_free(header); + return err; +} + +int init_cow_file(int fd, char *cow_file, char *backing_file, int sectorsize, + int alignment, int *bitmap_offset_out, + unsigned long *bitmap_len_out, int *data_offset_out) +{ + unsigned long long size, offset; + char zero = 0; + int err; + + err = write_cow_header(cow_file, fd, backing_file, sectorsize, + alignment, &size); + if (err) + goto out; + + *bitmap_offset_out = ROUND_UP(sizeof(struct cow_header_v3), alignment); + cow_sizes(COW_VERSION, size, sectorsize, alignment, *bitmap_offset_out, + bitmap_len_out, data_offset_out); + + offset = *data_offset_out + size - sizeof(zero); + err = cow_seek_file(fd, offset); + if (err < 0) { + cow_printf("cow bitmap lseek failed : err = %d\n", -err); + goto out; + } + + /* + * does not really matter how much we write it is just to set EOF + * this also sets the entire COW bitmap + * to zero without having to allocate it + */ + err = cow_write_file(fd, &zero, sizeof(zero)); + if (err != sizeof(zero)) { + cow_printf("Write of bitmap to new COW file '%s' failed, " + "err = %d\n", cow_file, -err); + if (err >= 0) + err = -EINVAL; + goto out; + } + + return 0; + out: + return err; +} diff --git a/arch/um/drivers/daemon.h b/arch/um/drivers/daemon.h new file mode 100644 index 0000000..6e0e891 --- /dev/null +++ b/arch/um/drivers/daemon.h @@ -0,0 +1,29 @@ +/* + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#ifndef __DAEMON_H__ +#define __DAEMON_H__ + +#include "net_user.h" + +#define SWITCH_VERSION 3 + +struct daemon_data { + char *sock_type; + char *ctl_sock; + void *ctl_addr; + void *data_addr; + void *local_addr; + int fd; + int control; + void *dev; +}; + +extern const struct net_user_info daemon_user_info; + +extern int daemon_user_write(int fd, void *buf, int len, + struct daemon_data *pri); + +#endif diff --git a/arch/um/drivers/daemon_kern.c b/arch/um/drivers/daemon_kern.c new file mode 100644 index 0000000..d53ff52 --- /dev/null +++ b/arch/um/drivers/daemon_kern.c @@ -0,0 +1,95 @@ +/* + * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and + * James Leu (jleu@mindspring.net). + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Copyright (C) 2001 by various other people who didn't put their name here. + * Licensed under the GPL. + */ + +#include "linux/init.h" +#include <linux/netdevice.h> +#include "net_kern.h" +#include "daemon.h" + +struct daemon_init { + char *sock_type; + char *ctl_sock; +}; + +static void daemon_init(struct net_device *dev, void *data) +{ + struct uml_net_private *pri; + struct daemon_data *dpri; + struct daemon_init *init = data; + + pri = dev->priv; + dpri = (struct daemon_data *) pri->user; + dpri->sock_type = init->sock_type; + dpri->ctl_sock = init->ctl_sock; + dpri->fd = -1; + dpri->control = -1; + dpri->dev = dev; + /* We will free this pointer. If it contains crap we're burned. */ + dpri->ctl_addr = NULL; + dpri->data_addr = NULL; + dpri->local_addr = NULL; + + printk("daemon backend (uml_switch version %d) - %s:%s", + SWITCH_VERSION, dpri->sock_type, dpri->ctl_sock); + printk("\n"); +} + +static int daemon_read(int fd, struct sk_buff *skb, struct uml_net_private *lp) +{ + return net_recvfrom(fd, skb_mac_header(skb), + skb->dev->mtu + ETH_HEADER_OTHER); +} + +static int daemon_write(int fd, struct sk_buff *skb, struct uml_net_private *lp) +{ + return daemon_user_write(fd, skb->data, skb->len, + (struct daemon_data *) &lp->user); +} + +static const struct net_kern_info daemon_kern_info = { + .init = daemon_init, + .protocol = eth_protocol, + .read = daemon_read, + .write = daemon_write, +}; + +static int daemon_setup(char *str, char **mac_out, void *data) +{ + struct daemon_init *init = data; + char *remain; + + *init = ((struct daemon_init) + { .sock_type = "unix", + .ctl_sock = "/tmp/uml.ctl" }); + + remain = split_if_spec(str, mac_out, &init->sock_type, &init->ctl_sock, + NULL); + if (remain != NULL) + printk(KERN_WARNING "daemon_setup : Ignoring data socket " + "specification\n"); + + return 1; +} + +static struct transport daemon_transport = { + .list = LIST_HEAD_INIT(daemon_transport.list), + .name = "daemon", + .setup = daemon_setup, + .user = &daemon_user_info, + .kern = &daemon_kern_info, + .private_size = sizeof(struct daemon_data), + .setup_size = sizeof(struct daemon_init), +}; + +static int register_daemon(void) +{ + register_transport(&daemon_transport); + return 0; +} + +late_initcall(register_daemon); diff --git a/arch/um/drivers/daemon_user.c b/arch/um/drivers/daemon_user.c new file mode 100644 index 0000000..f8e85e0 --- /dev/null +++ b/arch/um/drivers/daemon_user.c @@ -0,0 +1,194 @@ +/* + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and + * James Leu (jleu@mindspring.net). + * Copyright (C) 2001 by various other people who didn't put their name here. + * Licensed under the GPL. + */ + +#include <stdint.h> +#include <unistd.h> +#include <errno.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <sys/time.h> +#include <sys/un.h> +#include "daemon.h" +#include "net_user.h" +#include "os.h" +#include "um_malloc.h" +#include "user.h" + +enum request_type { REQ_NEW_CONTROL }; + +#define SWITCH_MAGIC 0xfeedface + +struct request_v3 { + uint32_t magic; + uint32_t version; + enum request_type type; + struct sockaddr_un sock; +}; + +static struct sockaddr_un *new_addr(void *name, int len) +{ + struct sockaddr_un *sun; + + sun = uml_kmalloc(sizeof(struct sockaddr_un), UM_GFP_KERNEL); + if (sun == NULL) { + printk(UM_KERN_ERR "new_addr: allocation of sockaddr_un " + "failed\n"); + return NULL; + } + sun->sun_family = AF_UNIX; + memcpy(sun->sun_path, name, len); + return sun; +} + +static int connect_to_switch(struct daemon_data *pri) +{ + struct sockaddr_un *ctl_addr = pri->ctl_addr; + struct sockaddr_un *local_addr = pri->local_addr; + struct sockaddr_un *sun; + struct request_v3 req; + int fd, n, err; + + pri->control = socket(AF_UNIX, SOCK_STREAM, 0); + if (pri->control < 0) { + err = -errno; + printk(UM_KERN_ERR "daemon_open : control socket failed, " + "errno = %d\n", -err); + return err; + } + + if (connect(pri->control, (struct sockaddr *) ctl_addr, + sizeof(*ctl_addr)) < 0) { + err = -errno; + printk(UM_KERN_ERR "daemon_open : control connect failed, " + "errno = %d\n", -err); + goto out; + } + + fd = socket(AF_UNIX, SOCK_DGRAM, 0); + if (fd < 0) { + err = -errno; + printk(UM_KERN_ERR "daemon_open : data socket failed, " + "errno = %d\n", -err); + goto out; + } + if (bind(fd, (struct sockaddr *) local_addr, sizeof(*local_addr)) < 0) { + err = -errno; + printk(UM_KERN_ERR "daemon_open : data bind failed, " + "errno = %d\n", -err); + goto out_close; + } + + sun = uml_kmalloc(sizeof(struct sockaddr_un), UM_GFP_KERNEL); + if (sun == NULL) { + printk(UM_KERN_ERR "new_addr: allocation of sockaddr_un " + "failed\n"); + err = -ENOMEM; + goto out_close; + } + + req.magic = SWITCH_MAGIC; + req.version = SWITCH_VERSION; + req.type = REQ_NEW_CONTROL; + req.sock = *local_addr; + n = write(pri->control, &req, sizeof(req)); + if (n != sizeof(req)) { + printk(UM_KERN_ERR "daemon_open : control setup request " + "failed, err = %d\n", -errno); + err = -ENOTCONN; + goto out_free; + } + + n = read(pri->control, sun, sizeof(*sun)); + if (n != sizeof(*sun)) { + printk(UM_KERN_ERR "daemon_open : read of data socket failed, " + "err = %d\n", -errno); + err = -ENOTCONN; + goto out_free; + } + + pri->data_addr = sun; + return fd; + + out_free: + kfree(sun); + out_close: + close(fd); + out: + close(pri->control); + return err; +} + +static int daemon_user_init(void *data, void *dev) +{ + struct daemon_data *pri = data; + struct timeval tv; + struct { + char zero; + int pid; + int usecs; + } name; + + if (!strcmp(pri->sock_type, "unix")) + pri->ctl_addr = new_addr(pri->ctl_sock, + strlen(pri->ctl_sock) + 1); + name.zero = 0; + name.pid = os_getpid(); + gettimeofday(&tv, NULL); + name.usecs = tv.tv_usec; + pri->local_addr = new_addr(&name, sizeof(name)); + pri->dev = dev; + pri->fd = connect_to_switch(pri); + if (pri->fd < 0) { + kfree(pri->local_addr); + pri->local_addr = NULL; + return pri->fd; + } + + return 0; +} + +static int daemon_open(void *data) +{ + struct daemon_data *pri = data; + return pri->fd; +} + +static void daemon_remove(void *data) +{ + struct daemon_data *pri = data; + + close(pri->fd); + pri->fd = -1; + close(pri->control); + pri->control = -1; + + kfree(pri->data_addr); + pri->data_addr = NULL; + kfree(pri->ctl_addr); + pri->ctl_addr = NULL; + kfree(pri->local_addr); + pri->local_addr = NULL; +} + +int daemon_user_write(int fd, void *buf, int len, struct daemon_data *pri) +{ + struct sockaddr_un *data_addr = pri->data_addr; + + return net_sendto(fd, buf, len, data_addr, sizeof(*data_addr)); +} + +const struct net_user_info daemon_user_info = { + .init = daemon_user_init, + .open = daemon_open, + .close = NULL, + .remove = daemon_remove, + .add_address = NULL, + .delete_address = NULL, + .mtu = ETH_MAX_PACKET, + .max_packet = ETH_MAX_PACKET + ETH_HEADER_OTHER, +}; diff --git a/arch/um/drivers/fd.c b/arch/um/drivers/fd.c new file mode 100644 index 0000000..f5a981a --- /dev/null +++ b/arch/um/drivers/fd.c @@ -0,0 +1,97 @@ +/* + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{linux.intel,addtoit}.com) + * Licensed under the GPL + */ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <errno.h> +#include <termios.h> +#include "chan_user.h" +#include "kern_constants.h" +#include "os.h" +#include "um_malloc.h" +#include "user.h" + +struct fd_chan { + int fd; + int raw; + struct termios tt; + char str[sizeof("1234567890\0")]; +}; + +static void *fd_init(char *str, int device, const struct chan_opts *opts) +{ + struct fd_chan *data; + char *end; + int n; + + if (*str != ':') { + printk(UM_KERN_ERR "fd_init : channel type 'fd' must specify a " + "file descriptor\n"); + return NULL; + } + str++; + n = strtoul(str, &end, 0); + if ((*end != '\0') || (end == str)) { + printk(UM_KERN_ERR "fd_init : couldn't parse file descriptor " + "'%s'\n", str); + return NULL; + } + + data = uml_kmalloc(sizeof(*data), UM_GFP_KERNEL); + if (data == NULL) + return NULL; + + *data = ((struct fd_chan) { .fd = n, + .raw = opts->raw }); + return data; +} + +static int fd_open(int input, int output, int primary, void *d, char **dev_out) +{ + struct fd_chan *data = d; + int err; + + if (data->raw && isatty(data->fd)) { + CATCH_EINTR(err = tcgetattr(data->fd, &data->tt)); + if (err) + return err; + + err = raw(data->fd); + if (err) + return err; + } + sprintf(data->str, "%d", data->fd); + *dev_out = data->str; + return data->fd; +} + +static void fd_close(int fd, void *d) +{ + struct fd_chan *data = d; + int err; + + if (!data->raw || !isatty(fd)) + return; + + CATCH_EINTR(err = tcsetattr(fd, TCSAFLUSH, &data->tt)); + if (err) + printk(UM_KERN_ERR "Failed to restore terminal state - " + "errno = %d\n", -err); + data->raw = 0; +} + +const struct chan_ops fd_ops = { + .type = "fd", + .init = fd_init, + .open = fd_open, + .close = fd_close, + .read = generic_read, + .write = generic_write, + .console_write = generic_console_write, + .window_size = generic_window_size, + .free = generic_free, + .winch = 1, +}; diff --git a/arch/um/drivers/harddog_kern.c b/arch/um/drivers/harddog_kern.c new file mode 100644 index 0000000..d332503 --- /dev/null +++ b/arch/um/drivers/harddog_kern.c @@ -0,0 +1,187 @@ +/* UML hardware watchdog, shamelessly stolen from: + * + * SoftDog 0.05: A Software Watchdog Device + * + * (c) Copyright 1996 Alan Cox <alan@redhat.com>, All Rights Reserved. + * http://www.redhat.com + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Neither Alan Cox nor CymruNet Ltd. admit liability nor provide + * warranty for any of this software. This material is provided + * "AS-IS" and at no charge. + * + * (c) Copyright 1995 Alan Cox <alan@lxorguk.ukuu.org.uk> + * + * Software only watchdog driver. Unlike its big brother the WDT501P + * driver this won't always recover a failed machine. + * + * 03/96: Angelo Haritsis <ah@doc.ic.ac.uk> : + * Modularised. + * Added soft_margin; use upon insmod to change the timer delay. + * NB: uses same minor as wdt (WATCHDOG_MINOR); we could use separate + * minors. + * + * 19980911 Alan Cox + * Made SMP safe for 2.3.x + * + * 20011127 Joel Becker (jlbec@evilplan.org> + * Added soft_noboot; Allows testing the softdog trigger without + * requiring a recompile. + * Added WDIOC_GETTIMEOUT and WDIOC_SETTIMOUT. + */ + +#include <linux/module.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/fs.h> +#include <linux/mm.h> +#include <linux/miscdevice.h> +#include <linux/watchdog.h> +#include <linux/reboot.h> +#include <linux/smp_lock.h> +#include <linux/init.h> +#include <linux/spinlock.h> +#include <asm/uaccess.h> +#include "mconsole.h" + +MODULE_LICENSE("GPL"); + +static DEFINE_SPINLOCK(lock); +static int timer_alive; +static int harddog_in_fd = -1; +static int harddog_out_fd = -1; + +/* + * Allow only one person to hold it open + */ + +extern int start_watchdog(int *in_fd_ret, int *out_fd_ret, char *sock); + +static int harddog_open(struct inode *inode, struct file *file) +{ + int err = -EBUSY; + char *sock = NULL; + + lock_kernel(); + spin_lock(&lock); + if(timer_alive) + goto err; +#ifdef CONFIG_WATCHDOG_NOWAYOUT + __module_get(THIS_MODULE); +#endif + +#ifdef CONFIG_MCONSOLE + sock = mconsole_notify_socket(); +#endif + err = start_watchdog(&harddog_in_fd, &harddog_out_fd, sock); + if(err) + goto err; + + timer_alive = 1; + spin_unlock(&lock); + unlock_kernel(); + return nonseekable_open(inode, file); +err: + spin_unlock(&lock); + unlock_kernel(); + return err; +} + +extern void stop_watchdog(int in_fd, int out_fd); + +static int harddog_release(struct inode *inode, struct file *file) +{ + /* + * Shut off the timer. + */ + + spin_lock(&lock); + + stop_watchdog(harddog_in_fd, harddog_out_fd); + harddog_in_fd = -1; + harddog_out_fd = -1; + + timer_alive=0; + spin_unlock(&lock); + + return 0; +} + +extern int ping_watchdog(int fd); + +static ssize_t harddog_write(struct file *file, const char __user *data, size_t len, + loff_t *ppos) +{ + /* + * Refresh the timer. + */ + if(len) + return ping_watchdog(harddog_out_fd); + return 0; +} + +static int harddog_ioctl(struct inode *inode, struct file *file, + unsigned int cmd, unsigned long arg) +{ + void __user *argp= (void __user *)arg; + static struct watchdog_info ident = { + WDIOC_SETTIMEOUT, + 0, + "UML Hardware Watchdog" + }; + switch (cmd) { + default: + return -ENOTTY; + case WDIOC_GETSUPPORT: + if(copy_to_user(argp, &ident, sizeof(ident))) + return -EFAULT; + return 0; + case WDIOC_GETSTATUS: + case WDIOC_GETBOOTSTATUS: + return put_user(0,(int __user *)argp); + case WDIOC_KEEPALIVE: + return ping_watchdog(harddog_out_fd); + } +} + +static const struct file_operations harddog_fops = { + .owner = THIS_MODULE, + .write = harddog_write, + .ioctl = harddog_ioctl, + .open = harddog_open, + .release = harddog_release, +}; + +static struct miscdevice harddog_miscdev = { + .minor = WATCHDOG_MINOR, + .name = "watchdog", + .fops = &harddog_fops, +}; + +static char banner[] __initdata = KERN_INFO "UML Watchdog Timer\n"; + +static int __init harddog_init(void) +{ + int ret; + + ret = misc_register(&harddog_miscdev); + + if (ret) + return ret; + + printk(banner); + + return 0; +} + +static void __exit harddog_exit(void) +{ + misc_deregister(&harddog_miscdev); +} + +module_init(harddog_init); +module_exit(harddog_exit); diff --git a/arch/um/drivers/harddog_user.c b/arch/um/drivers/harddog_user.c new file mode 100644 index 0000000..b56f8e0 --- /dev/null +++ b/arch/um/drivers/harddog_user.c @@ -0,0 +1,128 @@ +/* + * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include <stdio.h> +#include <unistd.h> +#include <errno.h> +#include "os.h" +#include "user.h" + +struct dog_data { + int stdin; + int stdout; + int close_me[2]; +}; + +static void pre_exec(void *d) +{ + struct dog_data *data = d; + + dup2(data->stdin, 0); + dup2(data->stdout, 1); + dup2(data->stdout, 2); + close(data->stdin); + close(data->stdout); + close(data->close_me[0]); + close(data->close_me[1]); +} + +int start_watchdog(int *in_fd_ret, int *out_fd_ret, char *sock) +{ + struct dog_data data; + int in_fds[2], out_fds[2], pid, n, err; + char pid_buf[sizeof("nnnnn\0")], c; + char *pid_args[] = { "/usr/bin/uml_watchdog", "-pid", pid_buf, NULL }; + char *mconsole_args[] = { "/usr/bin/uml_watchdog", "-mconsole", NULL, + NULL }; + char **args = NULL; + + err = os_pipe(in_fds, 1, 0); + if (err < 0) { + printk("harddog_open - os_pipe failed, err = %d\n", -err); + goto out; + } + + err = os_pipe(out_fds, 1, 0); + if (err < 0) { + printk("harddog_open - os_pipe failed, err = %d\n", -err); + goto out_close_in; + } + + data.stdin = out_fds[0]; + data.stdout = in_fds[1]; + data.close_me[0] = out_fds[1]; + data.close_me[1] = in_fds[0]; + + if (sock != NULL) { + mconsole_args[2] = sock; + args = mconsole_args; + } + else { + /* XXX The os_getpid() is not SMP correct */ + sprintf(pid_buf, "%d", os_getpid()); + args = pid_args; + } + + pid = run_helper(pre_exec, &data, args); + + close(out_fds[0]); + close(in_fds[1]); + + if (pid < 0) { + err = -pid; + printk("harddog_open - run_helper failed, errno = %d\n", -err); + goto out_close_out; + } + + n = read(in_fds[0], &c, sizeof(c)); + if (n == 0) { + printk("harddog_open - EOF on watchdog pipe\n"); + helper_wait(pid); + err = -EIO; + goto out_close_out; + } + else if (n < 0) { + printk("harddog_open - read of watchdog pipe failed, " + "err = %d\n", errno); + helper_wait(pid); + err = n; + goto out_close_out; + } + *in_fd_ret = in_fds[0]; + *out_fd_ret = out_fds[1]; + return 0; + + out_close_in: + close(in_fds[0]); + close(in_fds[1]); + out_close_out: + close(out_fds[0]); + close(out_fds[1]); + out: + return err; +} + +void stop_watchdog(int in_fd, int out_fd) +{ + close(in_fd); + close(out_fd); +} + +int ping_watchdog(int fd) +{ + int n; + char c = '\n'; + + n = write(fd, &c, sizeof(c)); + if (n != sizeof(c)) { + printk("ping_watchdog - write failed, ret = %d, err = %d\n", + n, errno); + if (n < 0) + return n; + return -EIO; + } + return 1; + +} diff --git a/arch/um/drivers/hostaudio_kern.c b/arch/um/drivers/hostaudio_kern.c new file mode 100644 index 0000000..368219c --- /dev/null +++ b/arch/um/drivers/hostaudio_kern.c @@ -0,0 +1,344 @@ +/* + * Copyright (C) 2002 Steve Schmidtke + * Licensed under the GPL + */ + +#include "linux/fs.h" +#include "linux/module.h" +#include "linux/slab.h" +#include "linux/sound.h" +#include "linux/soundcard.h" +#include "asm/uaccess.h" +#include "init.h" +#include "os.h" + +struct hostaudio_state { + int fd; +}; + +struct hostmixer_state { + int fd; +}; + +#define HOSTAUDIO_DEV_DSP "/dev/sound/dsp" +#define HOSTAUDIO_DEV_MIXER "/dev/sound/mixer" + +/* + * Changed either at boot time or module load time. At boot, this is + * single-threaded; at module load, multiple modules would each have + * their own copy of these variables. + */ +static char *dsp = HOSTAUDIO_DEV_DSP; +static char *mixer = HOSTAUDIO_DEV_MIXER; + +#define DSP_HELP \ +" This is used to specify the host dsp device to the hostaudio driver.\n" \ +" The default is \"" HOSTAUDIO_DEV_DSP "\".\n\n" + +#define MIXER_HELP \ +" This is used to specify the host mixer device to the hostaudio driver.\n"\ +" The default is \"" HOSTAUDIO_DEV_MIXER "\".\n\n" + +#ifndef MODULE +static int set_dsp(char *name, int *add) +{ + dsp = name; + return 0; +} + +__uml_setup("dsp=", set_dsp, "dsp=<dsp device>\n" DSP_HELP); + +static int set_mixer(char *name, int *add) +{ + mixer = name; + return 0; +} + +__uml_setup("mixer=", set_mixer, "mixer=<mixer device>\n" MIXER_HELP); + +#else /*MODULE*/ + +module_param(dsp, charp, 0644); +MODULE_PARM_DESC(dsp, DSP_HELP); + +module_param(mixer, charp, 0644); +MODULE_PARM_DESC(mixer, MIXER_HELP); + +#endif + +/* /dev/dsp file operations */ + +static ssize_t hostaudio_read(struct file *file, char __user *buffer, + size_t count, loff_t *ppos) +{ + struct hostaudio_state *state = file->private_data; + void *kbuf; + int err; + +#ifdef DEBUG + printk(KERN_DEBUG "hostaudio: read called, count = %d\n", count); +#endif + + kbuf = kmalloc(count, GFP_KERNEL); + if (kbuf == NULL) + return -ENOMEM; + + err = os_read_file(state->fd, kbuf, count); + if (err < 0) + goto out; + + if (copy_to_user(buffer, kbuf, err)) + err = -EFAULT; + +out: + kfree(kbuf); + return err; +} + +static ssize_t hostaudio_write(struct file *file, const char __user *buffer, + size_t count, loff_t *ppos) +{ + struct hostaudio_state *state = file->private_data; + void *kbuf; + int err; + +#ifdef DEBUG + printk(KERN_DEBUG "hostaudio: write called, count = %d\n", count); +#endif + + kbuf = kmalloc(count, GFP_KERNEL); + if (kbuf == NULL) + return -ENOMEM; + + err = -EFAULT; + if (copy_from_user(kbuf, buffer, count)) + goto out; + + err = os_write_file(state->fd, kbuf, count); + if (err < 0) + goto out; + *ppos += err; + + out: + kfree(kbuf); + return err; +} + +static unsigned int hostaudio_poll(struct file *file, + struct poll_table_struct *wait) +{ + unsigned int mask = 0; + +#ifdef DEBUG + printk(KERN_DEBUG "hostaudio: poll called (unimplemented)\n"); +#endif + + return mask; +} + +static int hostaudio_ioctl(struct inode *inode, struct file *file, + unsigned int cmd, unsigned long arg) +{ + struct hostaudio_state *state = file->private_data; + unsigned long data = 0; + int err; + +#ifdef DEBUG + printk(KERN_DEBUG "hostaudio: ioctl called, cmd = %u\n", cmd); +#endif + switch(cmd){ + case SNDCTL_DSP_SPEED: + case SNDCTL_DSP_STEREO: + case SNDCTL_DSP_GETBLKSIZE: + case SNDCTL_DSP_CHANNELS: + case SNDCTL_DSP_SUBDIVIDE: + case SNDCTL_DSP_SETFRAGMENT: + if (get_user(data, (int __user *) arg)) + return -EFAULT; + break; + default: + break; + } + + err = os_ioctl_generic(state->fd, cmd, (unsigned long) &data); + + switch(cmd){ + case SNDCTL_DSP_SPEED: + case SNDCTL_DSP_STEREO: + case SNDCTL_DSP_GETBLKSIZE: + case SNDCTL_DSP_CHANNELS: + case SNDCTL_DSP_SUBDIVIDE: + case SNDCTL_DSP_SETFRAGMENT: + if (put_user(data, (int __user *) arg)) + return -EFAULT; + break; + default: + break; + } + + return err; +} + +static int hostaudio_open(struct inode *inode, struct file *file) +{ + struct hostaudio_state *state; + int r = 0, w = 0; + int ret; + +#ifdef DEBUG + printk(KERN_DEBUG "hostaudio: open called (host: %s)\n", dsp); +#endif + + state = kmalloc(sizeof(struct hostaudio_state), GFP_KERNEL); + if (state == NULL) + return -ENOMEM; + + if (file->f_mode & FMODE_READ) + r = 1; + if (file->f_mode & FMODE_WRITE) + w = 1; + + ret = os_open_file(dsp, of_set_rw(OPENFLAGS(), r, w), 0); + if (ret < 0) { + kfree(state); + return ret; + } + state->fd = ret; + file->private_data = state; + return 0; +} + +static int hostaudio_release(struct inode *inode, struct file *file) +{ + struct hostaudio_state *state = file->private_data; + +#ifdef DEBUG + printk(KERN_DEBUG "hostaudio: release called\n"); +#endif + os_close_file(state->fd); + kfree(state); + + return 0; +} + +/* /dev/mixer file operations */ + +static int hostmixer_ioctl_mixdev(struct inode *inode, struct file *file, + unsigned int cmd, unsigned long arg) +{ + struct hostmixer_state *state = file->private_data; + +#ifdef DEBUG + printk(KERN_DEBUG "hostmixer: ioctl called\n"); +#endif + + return os_ioctl_generic(state->fd, cmd, arg); +} + +static int hostmixer_open_mixdev(struct inode *inode, struct file *file) +{ + struct hostmixer_state *state; + int r = 0, w = 0; + int ret; + +#ifdef DEBUG + printk(KERN_DEBUG "hostmixer: open called (host: %s)\n", mixer); +#endif + + state = kmalloc(sizeof(struct hostmixer_state), GFP_KERNEL); + if (state == NULL) + return -ENOMEM; + + if (file->f_mode & FMODE_READ) + r = 1; + if (file->f_mode & FMODE_WRITE) + w = 1; + + ret = os_open_file(mixer, of_set_rw(OPENFLAGS(), r, w), 0); + + if (ret < 0) { + printk(KERN_ERR "hostaudio_open_mixdev failed to open '%s', " + "err = %d\n", dsp, -ret); + kfree(state); + return ret; + } + + file->private_data = state; + return 0; +} + +static int hostmixer_release(struct inode *inode, struct file *file) +{ + struct hostmixer_state *state = file->private_data; + +#ifdef DEBUG + printk(KERN_DEBUG "hostmixer: release called\n"); +#endif + + os_close_file(state->fd); + kfree(state); + + return 0; +} + +/* kernel module operations */ + +static const struct file_operations hostaudio_fops = { + .owner = THIS_MODULE, + .llseek = no_llseek, + .read = hostaudio_read, + .write = hostaudio_write, + .poll = hostaudio_poll, + .ioctl = hostaudio_ioctl, + .mmap = NULL, + .open = hostaudio_open, + .release = hostaudio_release, +}; + +static const struct file_operations hostmixer_fops = { + .owner = THIS_MODULE, + .llseek = no_llseek, + .ioctl = hostmixer_ioctl_mixdev, + .open = hostmixer_open_mixdev, + .release = hostmixer_release, +}; + +struct { + int dev_audio; + int dev_mixer; +} module_data; + +MODULE_AUTHOR("Steve Schmidtke"); +MODULE_DESCRIPTION("UML Audio Relay"); +MODULE_LICENSE("GPL"); + +static int __init hostaudio_init_module(void) +{ + printk(KERN_INFO "UML Audio Relay (host dsp = %s, host mixer = %s)\n", + dsp, mixer); + + module_data.dev_audio = register_sound_dsp(&hostaudio_fops, -1); + if (module_data.dev_audio < 0) { + printk(KERN_ERR "hostaudio: couldn't register DSP device!\n"); + return -ENODEV; + } + + module_data.dev_mixer = register_sound_mixer(&hostmixer_fops, -1); + if (module_data.dev_mixer < 0) { + printk(KERN_ERR "hostmixer: couldn't register mixer " + "device!\n"); + unregister_sound_dsp(module_data.dev_audio); + return -ENODEV; + } + + return 0; +} + +static void __exit hostaudio_cleanup_module (void) +{ + unregister_sound_mixer(module_data.dev_mixer); + unregister_sound_dsp(module_data.dev_audio); +} + +module_init(hostaudio_init_module); +module_exit(hostaudio_cleanup_module); diff --git a/arch/um/drivers/line.c b/arch/um/drivers/line.c new file mode 100644 index 0000000..14a102e --- /dev/null +++ b/arch/um/drivers/line.c @@ -0,0 +1,871 @@ +/* + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include "linux/irqreturn.h" +#include "linux/kd.h" +#include "chan_kern.h" +#include "irq_kern.h" +#include "irq_user.h" +#include "kern_util.h" +#include "os.h" + +#define LINE_BUFSIZE 4096 + +static irqreturn_t line_interrupt(int irq, void *data) +{ + struct chan *chan = data; + struct line *line = chan->line; + struct tty_struct *tty = line->tty; + + if (line) + chan_interrupt(&line->chan_list, &line->task, tty, irq); + return IRQ_HANDLED; +} + +static void line_timer_cb(struct work_struct *work) +{ + struct line *line = container_of(work, struct line, task.work); + + if (!line->throttled) + chan_interrupt(&line->chan_list, &line->task, line->tty, + line->driver->read_irq); +} + +/* + * Returns the free space inside the ring buffer of this line. + * + * Should be called while holding line->lock (this does not modify data). + */ +static int write_room(struct line *line) +{ + int n; + + if (line->buffer == NULL) + return LINE_BUFSIZE - 1; + + /* This is for the case where the buffer is wrapped! */ + n = line->head - line->tail; + + if (n <= 0) + n += LINE_BUFSIZE; /* The other case */ + return n - 1; +} + +int line_write_room(struct tty_struct *tty) +{ + struct line *line = tty->driver_data; + unsigned long flags; + int room; + + spin_lock_irqsave(&line->lock, flags); + room = write_room(line); + spin_unlock_irqrestore(&line->lock, flags); + + return room; +} + +int line_chars_in_buffer(struct tty_struct *tty) +{ + struct line *line = tty->driver_data; + unsigned long flags; + int ret; + + spin_lock_irqsave(&line->lock, flags); + /* write_room subtracts 1 for the needed NULL, so we readd it.*/ + ret = LINE_BUFSIZE - (write_room(line) + 1); + spin_unlock_irqrestore(&line->lock, flags); + + return ret; +} + +/* + * This copies the content of buf into the circular buffer associated with + * this line. + * The return value is the number of characters actually copied, i.e. the ones + * for which there was space: this function is not supposed to ever flush out + * the circular buffer. + * + * Must be called while holding line->lock! + */ +static int buffer_data(struct line *line, const char *buf, int len) +{ + int end, room; + + if (line->buffer == NULL) { + line->buffer = kmalloc(LINE_BUFSIZE, GFP_ATOMIC); + if (line->buffer == NULL) { + printk(KERN_ERR "buffer_data - atomic allocation " + "failed\n"); + return 0; + } + line->head = line->buffer; + line->tail = line->buffer; + } + + room = write_room(line); + len = (len > room) ? room : len; + + end = line->buffer + LINE_BUFSIZE - line->tail; + + if (len < end) { + memcpy(line->tail, buf, len); + line->tail += len; + } + else { + /* The circular buffer is wrapping */ + memcpy(line->tail, buf, end); + buf += end; + memcpy(line->buffer, buf, len - end); + line->tail = line->buffer + len - end; + } + + return len; +} + +/* + * Flushes the ring buffer to the output channels. That is, write_chan is + * called, passing it line->head as buffer, and an appropriate count. + * + * On exit, returns 1 when the buffer is empty, + * 0 when the buffer is not empty on exit, + * and -errno when an error occurred. + * + * Must be called while holding line->lock!*/ +static int flush_buffer(struct line *line) +{ + int n, count; + + if ((line->buffer == NULL) || (line->head == line->tail)) + return 1; + + if (line->tail < line->head) { + /* line->buffer + LINE_BUFSIZE is the end of the buffer! */ + count = line->buffer + LINE_BUFSIZE - line->head; + + n = write_chan(&line->chan_list, line->head, count, + line->driver->write_irq); + if (n < 0) + return n; + if (n == count) { + /* + * We have flushed from ->head to buffer end, now we + * must flush only from the beginning to ->tail. + */ + line->head = line->buffer; + } else { + line->head += n; + return 0; + } + } + + count = line->tail - line->head; + n = write_chan(&line->chan_list, line->head, count, + line->driver->write_irq); + + if (n < 0) + return n; + + line->head += n; + return line->head == line->tail; +} + +void line_flush_buffer(struct tty_struct *tty) +{ + struct line *line = tty->driver_data; + unsigned long flags; + int err; + + spin_lock_irqsave(&line->lock, flags); + err = flush_buffer(line); + spin_unlock_irqrestore(&line->lock, flags); +} + +/* + * We map both ->flush_chars and ->put_char (which go in pair) onto + * ->flush_buffer and ->write. Hope it's not that bad. + */ +void line_flush_chars(struct tty_struct *tty) +{ + line_flush_buffer(tty); +} + +int line_put_char(struct tty_struct *tty, unsigned char ch) +{ + return line_write(tty, &ch, sizeof(ch)); +} + +int line_write(struct tty_struct *tty, const unsigned char *buf, int len) +{ + struct line *line = tty->driver_data; + unsigned long flags; + int n, ret = 0; + + spin_lock_irqsave(&line->lock, flags); + if (line->head != line->tail) + ret = buffer_data(line, buf, len); + else { + n = write_chan(&line->chan_list, buf, len, + line->driver->write_irq); + if (n < 0) { + ret = n; + goto out_up; + } + + len -= n; + ret += n; + if (len > 0) + ret += buffer_data(line, buf + n, len); + } +out_up: + spin_unlock_irqrestore(&line->lock, flags); + return ret; +} + +void line_set_termios(struct tty_struct *tty, struct ktermios * old) +{ + /* nothing */ +} + +static const struct { + int cmd; + char *level; + char *name; +} tty_ioctls[] = { + /* don't print these, they flood the log ... */ + { TCGETS, NULL, "TCGETS" }, + { TCSETS, NULL, "TCSETS" }, + { TCSETSW, NULL, "TCSETSW" }, + { TCFLSH, NULL, "TCFLSH" }, + { TCSBRK, NULL, "TCSBRK" }, + + /* general tty stuff */ + { TCSETSF, KERN_DEBUG, "TCSETSF" }, + { TCGETA, KERN_DEBUG, "TCGETA" }, + { TIOCMGET, KERN_DEBUG, "TIOCMGET" }, + { TCSBRKP, KERN_DEBUG, "TCSBRKP" }, + { TIOCMSET, KERN_DEBUG, "TIOCMSET" }, + + /* linux-specific ones */ + { TIOCLINUX, KERN_INFO, "TIOCLINUX" }, + { KDGKBMODE, KERN_INFO, "KDGKBMODE" }, + { KDGKBTYPE, KERN_INFO, "KDGKBTYPE" }, + { KDSIGACCEPT, KERN_INFO, "KDSIGACCEPT" }, +}; + +int line_ioctl(struct tty_struct *tty, struct file * file, + unsigned int cmd, unsigned long arg) +{ + int ret; + int i; + + ret = 0; + switch(cmd) { +#ifdef TIOCGETP + case TIOCGETP: + case TIOCSETP: + case TIOCSETN: +#endif +#ifdef TIOCGETC + case TIOCGETC: + case TIOCSETC: +#endif +#ifdef TIOCGLTC + case TIOCGLTC: + case TIOCSLTC: +#endif + /* Note: these are out of date as we now have TCGETS2 etc but this + whole lot should probably go away */ + case TCGETS: + case TCSETSF: + case TCSETSW: + case TCSETS: + case TCGETA: + case TCSETAF: + case TCSETAW: + case TCSETA: + case TCXONC: + case TCFLSH: + case TIOCOUTQ: + case TIOCINQ: + case TIOCGLCKTRMIOS: + case TIOCSLCKTRMIOS: + case TIOCPKT: + case TIOCGSOFTCAR: + case TIOCSSOFTCAR: + return -ENOIOCTLCMD; +#if 0 + case TCwhatever: + /* do something */ + break; +#endif + default: + for (i = 0; i < ARRAY_SIZE(tty_ioctls); i++) + if (cmd == tty_ioctls[i].cmd) + break; + if (i == ARRAY_SIZE(tty_ioctls)) { + printk(KERN_ERR "%s: %s: unknown ioctl: 0x%x\n", + __func__, tty->name, cmd); + } + ret = -ENOIOCTLCMD; + break; + } + return ret; +} + +void line_throttle(struct tty_struct *tty) +{ + struct line *line = tty->driver_data; + + deactivate_chan(&line->chan_list, line->driver->read_irq); + line->throttled = 1; +} + +void line_unthrottle(struct tty_struct *tty) +{ + struct line *line = tty->driver_data; + + line->throttled = 0; + chan_interrupt(&line->chan_list, &line->task, tty, + line->driver->read_irq); + + /* + * Maybe there is enough stuff pending that calling the interrupt + * throttles us again. In this case, line->throttled will be 1 + * again and we shouldn't turn the interrupt back on. + */ + if (!line->throttled) + reactivate_chan(&line->chan_list, line->driver->read_irq); +} + +static irqreturn_t line_write_interrupt(int irq, void *data) +{ + struct chan *chan = data; + struct line *line = chan->line; + struct tty_struct *tty = line->tty; + int err; + + /* + * Interrupts are disabled here because we registered the interrupt with + * IRQF_DISABLED (see line_setup_irq). + */ + + spin_lock(&line->lock); + err = flush_buffer(line); + if (err == 0) { + return IRQ_NONE; + } else if (err < 0) { + line->head = line->buffer; + line->tail = line->buffer; + } + spin_unlock(&line->lock); + + if (tty == NULL) + return IRQ_NONE; + + tty_wakeup(tty); + return IRQ_HANDLED; +} + +int line_setup_irq(int fd, int input, int output, struct line *line, void *data) +{ + const struct line_driver *driver = line->driver; + int err = 0, flags = IRQF_DISABLED | IRQF_SHARED | IRQF_SAMPLE_RANDOM; + + if (input) + err = um_request_irq(driver->read_irq, fd, IRQ_READ, + line_interrupt, flags, + driver->read_irq_name, data); + if (err) + return err; + if (output) + err = um_request_irq(driver->write_irq, fd, IRQ_WRITE, + line_write_interrupt, flags, + driver->write_irq_name, data); + line->have_irq = 1; + return err; +} + +/* + * Normally, a driver like this can rely mostly on the tty layer + * locking, particularly when it comes to the driver structure. + * However, in this case, mconsole requests can come in "from the + * side", and race with opens and closes. + * + * mconsole config requests will want to be sure the device isn't in + * use, and get_config, open, and close will want a stable + * configuration. The checking and modification of the configuration + * is done under a spinlock. Checking whether the device is in use is + * line->tty->count > 1, also under the spinlock. + * + * tty->count serves to decide whether the device should be enabled or + * disabled on the host. If it's equal to 1, then we are doing the + * first open or last close. Otherwise, open and close just return. + */ + +int line_open(struct line *lines, struct tty_struct *tty) +{ + struct line *line = &lines[tty->index]; + int err = -ENODEV; + + spin_lock(&line->count_lock); + if (!line->valid) + goto out_unlock; + + err = 0; + if (tty->count > 1) + goto out_unlock; + + spin_unlock(&line->count_lock); + + tty->driver_data = line; + line->tty = tty; + + err = enable_chan(line); + if (err) + return err; + + INIT_DELAYED_WORK(&line->task, line_timer_cb); + + if (!line->sigio) { + chan_enable_winch(&line->chan_list, tty); + line->sigio = 1; + } + + chan_window_size(&line->chan_list, &tty->winsize.ws_row, + &tty->winsize.ws_col); + + return err; + +out_unlock: + spin_unlock(&line->count_lock); + return err; +} + +static void unregister_winch(struct tty_struct *tty); + +void line_close(struct tty_struct *tty, struct file * filp) +{ + struct line *line = tty->driver_data; + + /* + * If line_open fails (and tty->driver_data is never set), + * tty_open will call line_close. So just return in this case. + */ + if (line == NULL) + return; + + /* We ignore the error anyway! */ + flush_buffer(line); + + spin_lock(&line->count_lock); + if (!line->valid) + goto out_unlock; + + if (tty->count > 1) + goto out_unlock; + + spin_unlock(&line->count_lock); + + line->tty = NULL; + tty->driver_data = NULL; + + if (line->sigio) { + unregister_winch(tty); + line->sigio = 0; + } + + return; + +out_unlock: + spin_unlock(&line->count_lock); +} + +void close_lines(struct line *lines, int nlines) +{ + int i; + + for(i = 0; i < nlines; i++) + close_chan(&lines[i].chan_list, 0); +} + +static int setup_one_line(struct line *lines, int n, char *init, int init_prio, + char **error_out) +{ + struct line *line = &lines[n]; + int err = -EINVAL; + + spin_lock(&line->count_lock); + + if (line->tty != NULL) { + *error_out = "Device is already open"; + goto out; + } + + if (line->init_pri <= init_prio) { + line->init_pri = init_prio; + if (!strcmp(init, "none")) + line->valid = 0; + else { + line->init_str = init; + line->valid = 1; + } + } + err = 0; +out: + spin_unlock(&line->count_lock); + return err; +} + +/* + * Common setup code for both startup command line and mconsole initialization. + * @lines contains the array (of size @num) to modify; + * @init is the setup string; + * @error_out is an error string in the case of failure; + */ + +int line_setup(struct line *lines, unsigned int num, char *init, + char **error_out) +{ + int i, n, err; + char *end; + + if (*init == '=') { + /* + * We said con=/ssl= instead of con#=, so we are configuring all + * consoles at once. + */ + n = -1; + } + else { + n = simple_strtoul(init, &end, 0); + if (*end != '=') { + *error_out = "Couldn't parse device number"; + return -EINVAL; + } + init = end; + } + init++; + + if (n >= (signed int) num) { + *error_out = "Device number out of range"; + return -EINVAL; + } + else if (n >= 0) { + err = setup_one_line(lines, n, init, INIT_ONE, error_out); + if (err) + return err; + } + else { + for(i = 0; i < num; i++) { + err = setup_one_line(lines, i, init, INIT_ALL, + error_out); + if (err) + return err; + } + } + return n == -1 ? num : n; +} + +int line_config(struct line *lines, unsigned int num, char *str, + const struct chan_opts *opts, char **error_out) +{ + struct line *line; + char *new; + int n; + + if (*str == '=') { + *error_out = "Can't configure all devices from mconsole"; + return -EINVAL; + } + + new = kstrdup(str, GFP_KERNEL); + if (new == NULL) { + *error_out = "Failed to allocate memory"; + return -ENOMEM; + } + n = line_setup(lines, num, new, error_out); + if (n < 0) + return n; + + line = &lines[n]; + return parse_chan_pair(line->init_str, line, n, opts, error_out); +} + +int line_get_config(char *name, struct line *lines, unsigned int num, char *str, + int size, char **error_out) +{ + struct line *line; + char *end; + int dev, n = 0; + + dev = simple_strtoul(name, &end, 0); + if ((*end != '\0') || (end == name)) { + *error_out = "line_get_config failed to parse device number"; + return 0; + } + + if ((dev < 0) || (dev >= num)) { + *error_out = "device number out of range"; + return 0; + } + + line = &lines[dev]; + + spin_lock(&line->count_lock); + if (!line->valid) + CONFIG_CHUNK(str, size, n, "none", 1); + else if (line->tty == NULL) + CONFIG_CHUNK(str, size, n, line->init_str, 1); + else n = chan_config_string(&line->chan_list, str, size, error_out); + spin_unlock(&line->count_lock); + + return n; +} + +int line_id(char **str, int *start_out, int *end_out) +{ + char *end; + int n; + + n = simple_strtoul(*str, &end, 0); + if ((*end != '\0') || (end == *str)) + return -1; + + *str = end; + *start_out = n; + *end_out = n; + return n; +} + +int line_remove(struct line *lines, unsigned int num, int n, char **error_out) +{ + int err; + char config[sizeof("conxxxx=none\0")]; + + sprintf(config, "%d=none", n); + err = line_setup(lines, num, config, error_out); + if (err >= 0) + err = 0; + return err; +} + +struct tty_driver *register_lines(struct line_driver *line_driver, + const struct tty_operations *ops, + struct line *lines, int nlines) +{ + int i; + struct tty_driver *driver = alloc_tty_driver(nlines); + + if (!driver) + return NULL; + + driver->driver_name = line_driver->name; + driver->name = line_driver->device_name; + driver->major = line_driver->major; + driver->minor_start = line_driver->minor_start; + driver->type = line_driver->type; + driver->subtype = line_driver->subtype; + driver->flags = TTY_DRIVER_REAL_RAW; + driver->init_termios = tty_std_termios; + tty_set_operations(driver, ops); + + if (tty_register_driver(driver)) { + printk(KERN_ERR "register_lines : can't register %s driver\n", + line_driver->name); + put_tty_driver(driver); + return NULL; + } + + for(i = 0; i < nlines; i++) { + if (!lines[i].valid) + tty_unregister_device(driver, i); + } + + mconsole_register_dev(&line_driver->mc); + return driver; +} + +static DEFINE_SPINLOCK(winch_handler_lock); +static LIST_HEAD(winch_handlers); + +void lines_init(struct line *lines, int nlines, struct chan_opts *opts) +{ + struct line *line; + char *error; + int i; + + for(i = 0; i < nlines; i++) { + line = &lines[i]; + INIT_LIST_HEAD(&line->chan_list); + + if (line->init_str == NULL) + continue; + + line->init_str = kstrdup(line->init_str, GFP_KERNEL); + if (line->init_str == NULL) + printk(KERN_ERR "lines_init - kstrdup returned NULL\n"); + + if (parse_chan_pair(line->init_str, line, i, opts, &error)) { + printk(KERN_ERR "parse_chan_pair failed for " + "device %d : %s\n", i, error); + line->valid = 0; + } + } +} + +struct winch { + struct list_head list; + int fd; + int tty_fd; + int pid; + struct tty_struct *tty; + unsigned long stack; +}; + +static void free_winch(struct winch *winch, int free_irq_ok) +{ + list_del(&winch->list); + + if (winch->pid != -1) + os_kill_process(winch->pid, 1); + if (winch->fd != -1) + os_close_file(winch->fd); + if (winch->stack != 0) + free_stack(winch->stack, 0); + if (free_irq_ok) + free_irq(WINCH_IRQ, winch); + kfree(winch); +} + +static irqreturn_t winch_interrupt(int irq, void *data) +{ + struct winch *winch = data; + struct tty_struct *tty; + struct line *line; + int err; + char c; + + if (winch->fd != -1) { + err = generic_read(winch->fd, &c, NULL); + if (err < 0) { + if (err != -EAGAIN) { + printk(KERN_ERR "winch_interrupt : " + "read failed, errno = %d\n", -err); + printk(KERN_ERR "fd %d is losing SIGWINCH " + "support\n", winch->tty_fd); + free_winch(winch, 0); + return IRQ_HANDLED; + } + goto out; + } + } + tty = winch->tty; + if (tty != NULL) { + line = tty->driver_data; + if (line != NULL) { + chan_window_size(&line->chan_list, &tty->winsize.ws_row, + &tty->winsize.ws_col); + kill_pgrp(tty->pgrp, SIGWINCH, 1); + } + } + out: + if (winch->fd != -1) + reactivate_fd(winch->fd, WINCH_IRQ); + return IRQ_HANDLED; +} + +void register_winch_irq(int fd, int tty_fd, int pid, struct tty_struct *tty, + unsigned long stack) +{ + struct winch *winch; + + winch = kmalloc(sizeof(*winch), GFP_KERNEL); + if (winch == NULL) { + printk(KERN_ERR "register_winch_irq - kmalloc failed\n"); + goto cleanup; + } + + *winch = ((struct winch) { .list = LIST_HEAD_INIT(winch->list), + .fd = fd, + .tty_fd = tty_fd, + .pid = pid, + .tty = tty, + .stack = stack }); + + if (um_request_irq(WINCH_IRQ, fd, IRQ_READ, winch_interrupt, + IRQF_DISABLED | IRQF_SHARED | IRQF_SAMPLE_RANDOM, + "winch", winch) < 0) { + printk(KERN_ERR "register_winch_irq - failed to register " + "IRQ\n"); + goto out_free; + } + + spin_lock(&winch_handler_lock); + list_add(&winch->list, &winch_handlers); + spin_unlock(&winch_handler_lock); + + return; + + out_free: + kfree(winch); + cleanup: + os_kill_process(pid, 1); + os_close_file(fd); + if (stack != 0) + free_stack(stack, 0); +} + +static void unregister_winch(struct tty_struct *tty) +{ + struct list_head *ele; + struct winch *winch; + + spin_lock(&winch_handler_lock); + + list_for_each(ele, &winch_handlers) { + winch = list_entry(ele, struct winch, list); + if (winch->tty == tty) { + free_winch(winch, 1); + break; + } + } + spin_unlock(&winch_handler_lock); +} + +static void winch_cleanup(void) +{ + struct list_head *ele, *next; + struct winch *winch; + + spin_lock(&winch_handler_lock); + + list_for_each_safe(ele, next, &winch_handlers) { + winch = list_entry(ele, struct winch, list); + free_winch(winch, 1); + } + + spin_unlock(&winch_handler_lock); +} +__uml_exitcall(winch_cleanup); + +char *add_xterm_umid(char *base) +{ + char *umid, *title; + int len; + + umid = get_umid(); + if (*umid == '\0') + return base; + + len = strlen(base) + strlen(" ()") + strlen(umid) + 1; + title = kmalloc(len, GFP_KERNEL); + if (title == NULL) { + printk(KERN_ERR "Failed to allocate buffer for xterm title\n"); + return base; + } + + snprintf(title, len, "%s (%s)", base, umid); + return title; +} diff --git a/arch/um/drivers/mcast.h b/arch/um/drivers/mcast.h new file mode 100644 index 0000000..6fa282e --- /dev/null +++ b/arch/um/drivers/mcast.h @@ -0,0 +1,24 @@ +/* + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#ifndef __DRIVERS_MCAST_H +#define __DRIVERS_MCAST_H + +#include "net_user.h" + +struct mcast_data { + char *addr; + unsigned short port; + void *mcast_addr; + int ttl; + void *dev; +}; + +extern const struct net_user_info mcast_user_info; + +extern int mcast_user_write(int fd, void *buf, int len, + struct mcast_data *pri); + +#endif diff --git a/arch/um/drivers/mcast_kern.c b/arch/um/drivers/mcast_kern.c new file mode 100644 index 0000000..8c4378a --- /dev/null +++ b/arch/um/drivers/mcast_kern.c @@ -0,0 +1,120 @@ +/* + * user-mode-linux networking multicast transport + * Copyright (C) 2001 by Harald Welte <laforge@gnumonks.org> + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * + * based on the existing uml-networking code, which is + * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and + * James Leu (jleu@mindspring.net). + * Copyright (C) 2001 by various other people who didn't put their name here. + * + * Licensed under the GPL. + */ + +#include "linux/init.h" +#include <linux/netdevice.h> +#include "mcast.h" +#include "net_kern.h" + +struct mcast_init { + char *addr; + int port; + int ttl; +}; + +static void mcast_init(struct net_device *dev, void *data) +{ + struct uml_net_private *pri; + struct mcast_data *dpri; + struct mcast_init *init = data; + + pri = dev->priv; + dpri = (struct mcast_data *) pri->user; + dpri->addr = init->addr; + dpri->port = init->port; + dpri->ttl = init->ttl; + dpri->dev = dev; + + printk("mcast backend multicast address: %s:%u, TTL:%u\n", + dpri->addr, dpri->port, dpri->ttl); +} + +static int mcast_read(int fd, struct sk_buff *skb, struct uml_net_private *lp) +{ + return net_recvfrom(fd, skb_mac_header(skb), + skb->dev->mtu + ETH_HEADER_OTHER); +} + +static int mcast_write(int fd, struct sk_buff *skb, struct uml_net_private *lp) +{ + return mcast_user_write(fd, skb->data, skb->len, + (struct mcast_data *) &lp->user); +} + +static const struct net_kern_info mcast_kern_info = { + .init = mcast_init, + .protocol = eth_protocol, + .read = mcast_read, + .write = mcast_write, +}; + +static int mcast_setup(char *str, char **mac_out, void *data) +{ + struct mcast_init *init = data; + char *port_str = NULL, *ttl_str = NULL, *remain; + char *last; + + *init = ((struct mcast_init) + { .addr = "239.192.168.1", + .port = 1102, + .ttl = 1 }); + + remain = split_if_spec(str, mac_out, &init->addr, &port_str, &ttl_str, + NULL); + if (remain != NULL) { + printk(KERN_ERR "mcast_setup - Extra garbage on " + "specification : '%s'\n", remain); + return 0; + } + + if (port_str != NULL) { + init->port = simple_strtoul(port_str, &last, 10); + if ((*last != '\0') || (last == port_str)) { + printk(KERN_ERR "mcast_setup - Bad port : '%s'\n", + port_str); + return 0; + } + } + + if (ttl_str != NULL) { + init->ttl = simple_strtoul(ttl_str, &last, 10); + if ((*last != '\0') || (last == ttl_str)) { + printk(KERN_ERR "mcast_setup - Bad ttl : '%s'\n", + ttl_str); + return 0; + } + } + + printk(KERN_INFO "Configured mcast device: %s:%u-%u\n", init->addr, + init->port, init->ttl); + + return 1; +} + +static struct transport mcast_transport = { + .list = LIST_HEAD_INIT(mcast_transport.list), + .name = "mcast", + .setup = mcast_setup, + .user = &mcast_user_info, + .kern = &mcast_kern_info, + .private_size = sizeof(struct mcast_data), + .setup_size = sizeof(struct mcast_init), +}; + +static int register_mcast(void) +{ + register_transport(&mcast_transport); + return 0; +} + +late_initcall(register_mcast); diff --git a/arch/um/drivers/mcast_user.c b/arch/um/drivers/mcast_user.c new file mode 100644 index 0000000..ee19e91 --- /dev/null +++ b/arch/um/drivers/mcast_user.c @@ -0,0 +1,165 @@ +/* + * user-mode-linux networking multicast transport + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Copyright (C) 2001 by Harald Welte <laforge@gnumonks.org> + * + * based on the existing uml-networking code, which is + * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and + * James Leu (jleu@mindspring.net). + * Copyright (C) 2001 by various other people who didn't put their name here. + * + * Licensed under the GPL. + * + */ + +#include <unistd.h> +#include <errno.h> +#include <netinet/in.h> +#include "kern_constants.h" +#include "mcast.h" +#include "net_user.h" +#include "um_malloc.h" +#include "user.h" + +static struct sockaddr_in *new_addr(char *addr, unsigned short port) +{ + struct sockaddr_in *sin; + + sin = uml_kmalloc(sizeof(struct sockaddr_in), UM_GFP_KERNEL); + if (sin == NULL) { + printk(UM_KERN_ERR "new_addr: allocation of sockaddr_in " + "failed\n"); + return NULL; + } + sin->sin_family = AF_INET; + sin->sin_addr.s_addr = in_aton(addr); + sin->sin_port = htons(port); + return sin; +} + +static int mcast_user_init(void *data, void *dev) +{ + struct mcast_data *pri = data; + + pri->mcast_addr = new_addr(pri->addr, pri->port); + pri->dev = dev; + return 0; +} + +static void mcast_remove(void *data) +{ + struct mcast_data *pri = data; + + kfree(pri->mcast_addr); + pri->mcast_addr = NULL; +} + +static int mcast_open(void *data) +{ + struct mcast_data *pri = data; + struct sockaddr_in *sin = pri->mcast_addr; + struct ip_mreq mreq; + int fd, yes = 1, err = -EINVAL; + + + if ((sin->sin_addr.s_addr == 0) || (sin->sin_port == 0)) + goto out; + + fd = socket(AF_INET, SOCK_DGRAM, 0); + + if (fd < 0) { + err = -errno; + printk(UM_KERN_ERR "mcast_open : data socket failed, " + "errno = %d\n", errno); + goto out; + } + + if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) < 0) { + err = -errno; + printk(UM_KERN_ERR "mcast_open: SO_REUSEADDR failed, " + "errno = %d\n", errno); + goto out_close; + } + + /* set ttl according to config */ + if (setsockopt(fd, SOL_IP, IP_MULTICAST_TTL, &pri->ttl, + sizeof(pri->ttl)) < 0) { + err = -errno; + printk(UM_KERN_ERR "mcast_open: IP_MULTICAST_TTL failed, " + "error = %d\n", errno); + goto out_close; + } + + /* set LOOP, so data does get fed back to local sockets */ + if (setsockopt(fd, SOL_IP, IP_MULTICAST_LOOP, &yes, sizeof(yes)) < 0) { + err = -errno; + printk(UM_KERN_ERR "mcast_open: IP_MULTICAST_LOOP failed, " + "error = %d\n", errno); + goto out_close; + } + + /* bind socket to mcast address */ + if (bind(fd, (struct sockaddr *) sin, sizeof(*sin)) < 0) { + err = -errno; + printk(UM_KERN_ERR "mcast_open : data bind failed, " + "errno = %d\n", errno); + goto out_close; + } + + /* subscribe to the multicast group */ + mreq.imr_multiaddr.s_addr = sin->sin_addr.s_addr; + mreq.imr_interface.s_addr = 0; + if (setsockopt(fd, SOL_IP, IP_ADD_MEMBERSHIP, + &mreq, sizeof(mreq)) < 0) { + err = -errno; + printk(UM_KERN_ERR "mcast_open: IP_ADD_MEMBERSHIP failed, " + "error = %d\n", errno); + printk(UM_KERN_ERR "There appears not to be a multicast-" + "capable network interface on the host.\n"); + printk(UM_KERN_ERR "eth0 should be configured in order to use " + "the multicast transport.\n"); + goto out_close; + } + + return fd; + + out_close: + close(fd); + out: + return err; +} + +static void mcast_close(int fd, void *data) +{ + struct ip_mreq mreq; + struct mcast_data *pri = data; + struct sockaddr_in *sin = pri->mcast_addr; + + mreq.imr_multiaddr.s_addr = sin->sin_addr.s_addr; + mreq.imr_interface.s_addr = 0; + if (setsockopt(fd, SOL_IP, IP_DROP_MEMBERSHIP, + &mreq, sizeof(mreq)) < 0) { + printk(UM_KERN_ERR "mcast_open: IP_DROP_MEMBERSHIP failed, " + "error = %d\n", errno); + } + + close(fd); +} + +int mcast_user_write(int fd, void *buf, int len, struct mcast_data *pri) +{ + struct sockaddr_in *data_addr = pri->mcast_addr; + + return net_sendto(fd, buf, len, data_addr, sizeof(*data_addr)); +} + +const struct net_user_info mcast_user_info = { + .init = mcast_user_init, + .open = mcast_open, + .close = mcast_close, + .remove = mcast_remove, + .add_address = NULL, + .delete_address = NULL, + .mtu = ETH_MAX_PACKET, + .max_packet = ETH_MAX_PACKET + ETH_HEADER_OTHER, +}; diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c new file mode 100644 index 0000000..8f44ebb --- /dev/null +++ b/arch/um/drivers/mconsole_kern.c @@ -0,0 +1,945 @@ +/* + * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) + * Copyright (C) 2001 - 2008 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include <linux/console.h> +#include <linux/ctype.h> +#include <linux/interrupt.h> +#include <linux/list.h> +#include <linux/mm.h> +#include <linux/module.h> +#include <linux/notifier.h> +#include <linux/reboot.h> +#include <linux/proc_fs.h> +#include <linux/slab.h> +#include <linux/syscalls.h> +#include <linux/utsname.h> +#include <linux/socket.h> +#include <linux/un.h> +#include <linux/workqueue.h> +#include <linux/mutex.h> +#include <asm/uaccess.h> + +#include "init.h" +#include "irq_kern.h" +#include "irq_user.h" +#include "kern_util.h" +#include "mconsole.h" +#include "mconsole_kern.h" +#include "os.h" + +static int do_unlink_socket(struct notifier_block *notifier, + unsigned long what, void *data) +{ + return mconsole_unlink_socket(); +} + + +static struct notifier_block reboot_notifier = { + .notifier_call = do_unlink_socket, + .priority = 0, +}; + +/* Safe without explicit locking for now. Tasklets provide their own + * locking, and the interrupt handler is safe because it can't interrupt + * itself and it can only happen on CPU 0. + */ + +static LIST_HEAD(mc_requests); + +static void mc_work_proc(struct work_struct *unused) +{ + struct mconsole_entry *req; + unsigned long flags; + + while (!list_empty(&mc_requests)) { + local_irq_save(flags); + req = list_entry(mc_requests.next, struct mconsole_entry, list); + list_del(&req->list); + local_irq_restore(flags); + req->request.cmd->handler(&req->request); + kfree(req); + } +} + +static DECLARE_WORK(mconsole_work, mc_work_proc); + +static irqreturn_t mconsole_interrupt(int irq, void *dev_id) +{ + /* long to avoid size mismatch warnings from gcc */ + long fd; + struct mconsole_entry *new; + static struct mc_request req; /* that's OK */ + + fd = (long) dev_id; + while (mconsole_get_request(fd, &req)) { + if (req.cmd->context == MCONSOLE_INTR) + (*req.cmd->handler)(&req); + else { + new = kmalloc(sizeof(*new), GFP_NOWAIT); + if (new == NULL) + mconsole_reply(&req, "Out of memory", 1, 0); + else { + new->request = req; + new->request.regs = get_irq_regs()->regs; + list_add(&new->list, &mc_requests); + } + } + } + if (!list_empty(&mc_requests)) + schedule_work(&mconsole_work); + reactivate_fd(fd, MCONSOLE_IRQ); + return IRQ_HANDLED; +} + +void mconsole_version(struct mc_request *req) +{ + char version[256]; + + sprintf(version, "%s %s %s %s %s", utsname()->sysname, + utsname()->nodename, utsname()->release, utsname()->version, + utsname()->machine); + mconsole_reply(req, version, 0, 0); +} + +void mconsole_log(struct mc_request *req) +{ + int len; + char *ptr = req->request.data; + + ptr += strlen("log "); + + len = req->len - (ptr - req->request.data); + printk(KERN_WARNING "%.*s", len, ptr); + mconsole_reply(req, "", 0, 0); +} + +/* This is a more convoluted version of mconsole_proc, which has some stability + * problems; however, we need it fixed, because it is expected that UML users + * mount HPPFS instead of procfs on /proc. And we want mconsole_proc to still + * show the real procfs content, not the ones from hppfs.*/ +#if 0 +void mconsole_proc(struct mc_request *req) +{ + struct nameidata nd; + struct file_system_type *proc; + struct super_block *super; + struct file *file; + int n, err; + char *ptr = req->request.data, *buf; + + ptr += strlen("proc"); + while (isspace(*ptr)) ptr++; + + proc = get_fs_type("proc"); + if (proc == NULL) { + mconsole_reply(req, "procfs not registered", 1, 0); + goto out; + } + + super = (*proc->get_sb)(proc, 0, NULL, NULL); + put_filesystem(proc); + if (super == NULL) { + mconsole_reply(req, "Failed to get procfs superblock", 1, 0); + goto out; + } + up_write(&super->s_umount); + + nd.path.dentry = super->s_root; + nd.path.mnt = NULL; + nd.flags = O_RDONLY + 1; + nd.last_type = LAST_ROOT; + + /* START: it was experienced that the stability problems are closed + * if commenting out these two calls + the below read cycle. To + * make UML crash again, it was enough to readd either one.*/ + err = link_path_walk(ptr, &nd); + if (err) { + mconsole_reply(req, "Failed to look up file", 1, 0); + goto out_kill; + } + + file = dentry_open(nd.path.dentry, nd.path.mnt, O_RDONLY); + if (IS_ERR(file)) { + mconsole_reply(req, "Failed to open file", 1, 0); + goto out_kill; + } + /*END*/ + + buf = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (buf == NULL) { + mconsole_reply(req, "Failed to allocate buffer", 1, 0); + goto out_fput; + } + + if ((file->f_op != NULL) && (file->f_op->read != NULL)) { + do { + n = (*file->f_op->read)(file, buf, PAGE_SIZE - 1, + &file->f_pos); + if (n >= 0) { + buf[n] = '\0'; + mconsole_reply(req, buf, 0, (n > 0)); + } + else { + mconsole_reply(req, "Read of file failed", + 1, 0); + goto out_free; + } + } while (n > 0); + } + else mconsole_reply(req, "", 0, 0); + + out_free: + kfree(buf); + out_fput: + fput(file); + out_kill: + deactivate_super(super); + out: ; +} +#endif + +void mconsole_proc(struct mc_request *req) +{ + char path[64]; + char *buf; + int len; + int fd; + int first_chunk = 1; + char *ptr = req->request.data; + + ptr += strlen("proc"); + while (isspace(*ptr)) + ptr++; + snprintf(path, sizeof(path), "/proc/%s", ptr); + + fd = sys_open(path, 0, 0); + if (fd < 0) { + mconsole_reply(req, "Failed to open file", 1, 0); + printk(KERN_ERR "open %s: %d\n",path,fd); + goto out; + } + + buf = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (buf == NULL) { + mconsole_reply(req, "Failed to allocate buffer", 1, 0); + goto out_close; + } + + for (;;) { + len = sys_read(fd, buf, PAGE_SIZE-1); + if (len < 0) { + mconsole_reply(req, "Read of file failed", 1, 0); + goto out_free; + } + /* Begin the file content on his own line. */ + if (first_chunk) { + mconsole_reply(req, "\n", 0, 1); + first_chunk = 0; + } + if (len == PAGE_SIZE-1) { + buf[len] = '\0'; + mconsole_reply(req, buf, 0, 1); + } else { + buf[len] = '\0'; + mconsole_reply(req, buf, 0, 0); + break; + } + } + + out_free: + kfree(buf); + out_close: + sys_close(fd); + out: + /* nothing */; +} + +#define UML_MCONSOLE_HELPTEXT \ +"Commands: \n\ + version - Get kernel version \n\ + help - Print this message \n\ + halt - Halt UML \n\ + reboot - Reboot UML \n\ + config <dev>=<config> - Add a new device to UML; \n\ + same syntax as command line \n\ + config <dev> - Query the configuration of a device \n\ + remove <dev> - Remove a device from UML \n\ + sysrq <letter> - Performs the SysRq action controlled by the letter \n\ + cad - invoke the Ctrl-Alt-Del handler \n\ + stop - pause the UML; it will do nothing until it receives a 'go' \n\ + go - continue the UML after a 'stop' \n\ + log <string> - make UML enter <string> into the kernel log\n\ + proc <file> - returns the contents of the UML's /proc/<file>\n\ + stack <pid> - returns the stack of the specified pid\n\ +" + +void mconsole_help(struct mc_request *req) +{ + mconsole_reply(req, UML_MCONSOLE_HELPTEXT, 0, 0); +} + +void mconsole_halt(struct mc_request *req) +{ + mconsole_reply(req, "", 0, 0); + machine_halt(); +} + +void mconsole_reboot(struct mc_request *req) +{ + mconsole_reply(req, "", 0, 0); + machine_restart(NULL); +} + +void mconsole_cad(struct mc_request *req) +{ + mconsole_reply(req, "", 0, 0); + ctrl_alt_del(); +} + +void mconsole_go(struct mc_request *req) +{ + mconsole_reply(req, "Not stopped", 1, 0); +} + +void mconsole_stop(struct mc_request *req) +{ + deactivate_fd(req->originating_fd, MCONSOLE_IRQ); + os_set_fd_block(req->originating_fd, 1); + mconsole_reply(req, "stopped", 0, 0); + for (;;) { + if (!mconsole_get_request(req->originating_fd, req)) + continue; + if (req->cmd->handler == mconsole_go) + break; + if (req->cmd->handler == mconsole_stop) { + mconsole_reply(req, "Already stopped", 1, 0); + continue; + } + if (req->cmd->handler == mconsole_sysrq) { + struct pt_regs *old_regs; + old_regs = set_irq_regs((struct pt_regs *)&req->regs); + mconsole_sysrq(req); + set_irq_regs(old_regs); + continue; + } + (*req->cmd->handler)(req); + } + os_set_fd_block(req->originating_fd, 0); + reactivate_fd(req->originating_fd, MCONSOLE_IRQ); + mconsole_reply(req, "", 0, 0); +} + +static DEFINE_SPINLOCK(mc_devices_lock); +static LIST_HEAD(mconsole_devices); + +void mconsole_register_dev(struct mc_device *new) +{ + spin_lock(&mc_devices_lock); + BUG_ON(!list_empty(&new->list)); + list_add(&new->list, &mconsole_devices); + spin_unlock(&mc_devices_lock); +} + +static struct mc_device *mconsole_find_dev(char *name) +{ + struct list_head *ele; + struct mc_device *dev; + + list_for_each(ele, &mconsole_devices) { + dev = list_entry(ele, struct mc_device, list); + if (!strncmp(name, dev->name, strlen(dev->name))) + return dev; + } + return NULL; +} + +#define UNPLUGGED_PER_PAGE \ + ((PAGE_SIZE - sizeof(struct list_head)) / sizeof(unsigned long)) + +struct unplugged_pages { + struct list_head list; + void *pages[UNPLUGGED_PER_PAGE]; +}; + +static DEFINE_MUTEX(plug_mem_mutex); +static unsigned long long unplugged_pages_count = 0; +static LIST_HEAD(unplugged_pages); +static int unplug_index = UNPLUGGED_PER_PAGE; + +static int mem_config(char *str, char **error_out) +{ + unsigned long long diff; + int err = -EINVAL, i, add; + char *ret; + + if (str[0] != '=') { + *error_out = "Expected '=' after 'mem'"; + goto out; + } + + str++; + if (str[0] == '-') + add = 0; + else if (str[0] == '+') { + add = 1; + } + else { + *error_out = "Expected increment to start with '-' or '+'"; + goto out; + } + + str++; + diff = memparse(str, &ret); + if (*ret != '\0') { + *error_out = "Failed to parse memory increment"; + goto out; + } + + diff /= PAGE_SIZE; + + mutex_lock(&plug_mem_mutex); + for (i = 0; i < diff; i++) { + struct unplugged_pages *unplugged; + void *addr; + + if (add) { + if (list_empty(&unplugged_pages)) + break; + + unplugged = list_entry(unplugged_pages.next, + struct unplugged_pages, list); + if (unplug_index > 0) + addr = unplugged->pages[--unplug_index]; + else { + list_del(&unplugged->list); + addr = unplugged; + unplug_index = UNPLUGGED_PER_PAGE; + } + + free_page((unsigned long) addr); + unplugged_pages_count--; + } + else { + struct page *page; + + page = alloc_page(GFP_ATOMIC); + if (page == NULL) + break; + + unplugged = page_address(page); + if (unplug_index == UNPLUGGED_PER_PAGE) { + list_add(&unplugged->list, &unplugged_pages); + unplug_index = 0; + } + else { + struct list_head *entry = unplugged_pages.next; + addr = unplugged; + + unplugged = list_entry(entry, + struct unplugged_pages, + list); + err = os_drop_memory(addr, PAGE_SIZE); + if (err) { + printk(KERN_ERR "Failed to release " + "memory - errno = %d\n", err); + *error_out = "Failed to release memory"; + goto out_unlock; + } + unplugged->pages[unplug_index++] = addr; + } + + unplugged_pages_count++; + } + } + + err = 0; +out_unlock: + mutex_unlock(&plug_mem_mutex); +out: + return err; +} + +static int mem_get_config(char *name, char *str, int size, char **error_out) +{ + char buf[sizeof("18446744073709551615")]; + int len = 0; + + sprintf(buf, "%ld", uml_physmem); + CONFIG_CHUNK(str, size, len, buf, 1); + + return len; +} + +static int mem_id(char **str, int *start_out, int *end_out) +{ + *start_out = 0; + *end_out = 0; + + return 0; +} + +static int mem_remove(int n, char **error_out) +{ + *error_out = "Memory doesn't support the remove operation"; + return -EBUSY; +} + +static struct mc_device mem_mc = { + .list = LIST_HEAD_INIT(mem_mc.list), + .name = "mem", + .config = mem_config, + .get_config = mem_get_config, + .id = mem_id, + .remove = mem_remove, +}; + +static int __init mem_mc_init(void) +{ + if (can_drop_memory()) + mconsole_register_dev(&mem_mc); + else printk(KERN_ERR "Can't release memory to the host - memory " + "hotplug won't be supported\n"); + return 0; +} + +__initcall(mem_mc_init); + +#define CONFIG_BUF_SIZE 64 + +static void mconsole_get_config(int (*get_config)(char *, char *, int, + char **), + struct mc_request *req, char *name) +{ + char default_buf[CONFIG_BUF_SIZE], *error, *buf; + int n, size; + + if (get_config == NULL) { + mconsole_reply(req, "No get_config routine defined", 1, 0); + return; + } + + error = NULL; + size = ARRAY_SIZE(default_buf); + buf = default_buf; + + while (1) { + n = (*get_config)(name, buf, size, &error); + if (error != NULL) { + mconsole_reply(req, error, 1, 0); + goto out; + } + + if (n <= size) { + mconsole_reply(req, buf, 0, 0); + goto out; + } + + if (buf != default_buf) + kfree(buf); + + size = n; + buf = kmalloc(size, GFP_KERNEL); + if (buf == NULL) { + mconsole_reply(req, "Failed to allocate buffer", 1, 0); + return; + } + } + out: + if (buf != default_buf) + kfree(buf); +} + +void mconsole_config(struct mc_request *req) +{ + struct mc_device *dev; + char *ptr = req->request.data, *name, *error_string = ""; + int err; + + ptr += strlen("config"); + while (isspace(*ptr)) + ptr++; + dev = mconsole_find_dev(ptr); + if (dev == NULL) { + mconsole_reply(req, "Bad configuration option", 1, 0); + return; + } + + name = &ptr[strlen(dev->name)]; + ptr = name; + while ((*ptr != '=') && (*ptr != '\0')) + ptr++; + + if (*ptr == '=') { + err = (*dev->config)(name, &error_string); + mconsole_reply(req, error_string, err, 0); + } + else mconsole_get_config(dev->get_config, req, name); +} + +void mconsole_remove(struct mc_request *req) +{ + struct mc_device *dev; + char *ptr = req->request.data, *err_msg = ""; + char error[256]; + int err, start, end, n; + + ptr += strlen("remove"); + while (isspace(*ptr)) ptr++; + dev = mconsole_find_dev(ptr); + if (dev == NULL) { + mconsole_reply(req, "Bad remove option", 1, 0); + return; + } + + ptr = &ptr[strlen(dev->name)]; + + err = 1; + n = (*dev->id)(&ptr, &start, &end); + if (n < 0) { + err_msg = "Couldn't parse device number"; + goto out; + } + else if ((n < start) || (n > end)) { + sprintf(error, "Invalid device number - must be between " + "%d and %d", start, end); + err_msg = error; + goto out; + } + + err_msg = NULL; + err = (*dev->remove)(n, &err_msg); + switch(err) { + case 0: + err_msg = ""; + break; + case -ENODEV: + if (err_msg == NULL) + err_msg = "Device doesn't exist"; + break; + case -EBUSY: + if (err_msg == NULL) + err_msg = "Device is currently open"; + break; + default: + break; + } +out: + mconsole_reply(req, err_msg, err, 0); +} + +struct mconsole_output { + struct list_head list; + struct mc_request *req; +}; + +static DEFINE_SPINLOCK(client_lock); +static LIST_HEAD(clients); +static char console_buf[MCONSOLE_MAX_DATA]; + +static void console_write(struct console *console, const char *string, + unsigned int len) +{ + struct list_head *ele; + int n; + + if (list_empty(&clients)) + return; + + while (len > 0) { + n = min((size_t) len, ARRAY_SIZE(console_buf)); + strncpy(console_buf, string, n); + string += n; + len -= n; + + list_for_each(ele, &clients) { + struct mconsole_output *entry; + + entry = list_entry(ele, struct mconsole_output, list); + mconsole_reply_len(entry->req, console_buf, n, 0, 1); + } + } +} + +static struct console mc_console = { .name = "mc", + .write = console_write, + .flags = CON_ENABLED, + .index = -1 }; + +static int mc_add_console(void) +{ + register_console(&mc_console); + return 0; +} + +late_initcall(mc_add_console); + +static void with_console(struct mc_request *req, void (*proc)(void *), + void *arg) +{ + struct mconsole_output entry; + unsigned long flags; + + entry.req = req; + spin_lock_irqsave(&client_lock, flags); + list_add(&entry.list, &clients); + spin_unlock_irqrestore(&client_lock, flags); + + (*proc)(arg); + + mconsole_reply_len(req, "", 0, 0, 0); + + spin_lock_irqsave(&client_lock, flags); + list_del(&entry.list); + spin_unlock_irqrestore(&client_lock, flags); +} + +#ifdef CONFIG_MAGIC_SYSRQ + +#include <linux/sysrq.h> + +static void sysrq_proc(void *arg) +{ + char *op = arg; + handle_sysrq(*op, NULL); +} + +void mconsole_sysrq(struct mc_request *req) +{ + char *ptr = req->request.data; + + ptr += strlen("sysrq"); + while (isspace(*ptr)) ptr++; + + /* + * With 'b', the system will shut down without a chance to reply, + * so in this case, we reply first. + */ + if (*ptr == 'b') + mconsole_reply(req, "", 0, 0); + + with_console(req, sysrq_proc, ptr); +} +#else +void mconsole_sysrq(struct mc_request *req) +{ + mconsole_reply(req, "Sysrq not compiled in", 1, 0); +} +#endif + +static void stack_proc(void *arg) +{ + struct task_struct *from = current, *to = arg; + + to->thread.saved_task = from; + switch_to(from, to, from); +} + +/* + * Mconsole stack trace + * Added by Allan Graves, Jeff Dike + * Dumps a stacks registers to the linux console. + * Usage stack <pid>. + */ +void mconsole_stack(struct mc_request *req) +{ + char *ptr = req->request.data; + int pid_requested= -1; + struct task_struct *to = NULL; + + /* + * Would be nice: + * 1) Send showregs output to mconsole. + * 2) Add a way to stack dump all pids. + */ + + ptr += strlen("stack"); + while (isspace(*ptr)) + ptr++; + + /* + * Should really check for multiple pids or reject bad args here + */ + /* What do the arguments in mconsole_reply mean? */ + if (sscanf(ptr, "%d", &pid_requested) == 0) { + mconsole_reply(req, "Please specify a pid", 1, 0); + return; + } + + to = find_task_by_pid_ns(pid_requested, &init_pid_ns); + if ((to == NULL) || (pid_requested == 0)) { + mconsole_reply(req, "Couldn't find that pid", 1, 0); + return; + } + with_console(req, stack_proc, to); +} + +/* + * Changed by mconsole_setup, which is __setup, and called before SMP is + * active. + */ +static char *notify_socket = NULL; + +static int __init mconsole_init(void) +{ + /* long to avoid size mismatch warnings from gcc */ + long sock; + int err; + char file[UNIX_PATH_MAX]; + + if (umid_file_name("mconsole", file, sizeof(file))) + return -1; + snprintf(mconsole_socket_name, sizeof(file), "%s", file); + + sock = os_create_unix_socket(file, sizeof(file), 1); + if (sock < 0) { + printk(KERN_ERR "Failed to initialize management console\n"); + return 1; + } + if (os_set_fd_block(sock, 0)) + goto out; + + register_reboot_notifier(&reboot_notifier); + + err = um_request_irq(MCONSOLE_IRQ, sock, IRQ_READ, mconsole_interrupt, + IRQF_DISABLED | IRQF_SHARED | IRQF_SAMPLE_RANDOM, + "mconsole", (void *)sock); + if (err) { + printk(KERN_ERR "Failed to get IRQ for management console\n"); + goto out; + } + + if (notify_socket != NULL) { + notify_socket = kstrdup(notify_socket, GFP_KERNEL); + if (notify_socket != NULL) + mconsole_notify(notify_socket, MCONSOLE_SOCKET, + mconsole_socket_name, + strlen(mconsole_socket_name) + 1); + else printk(KERN_ERR "mconsole_setup failed to strdup " + "string\n"); + } + + printk(KERN_INFO "mconsole (version %d) initialized on %s\n", + MCONSOLE_VERSION, mconsole_socket_name); + return 0; + + out: + os_close_file(sock); + return 1; +} + +__initcall(mconsole_init); + +static int write_proc_mconsole(struct file *file, const char __user *buffer, + unsigned long count, void *data) +{ + char *buf; + + buf = kmalloc(count + 1, GFP_KERNEL); + if (buf == NULL) + return -ENOMEM; + + if (copy_from_user(buf, buffer, count)) { + count = -EFAULT; + goto out; + } + + buf[count] = '\0'; + + mconsole_notify(notify_socket, MCONSOLE_USER_NOTIFY, buf, count); + out: + kfree(buf); + return count; +} + +static int create_proc_mconsole(void) +{ + struct proc_dir_entry *ent; + + if (notify_socket == NULL) + return 0; + + ent = create_proc_entry("mconsole", S_IFREG | 0200, NULL); + if (ent == NULL) { + printk(KERN_INFO "create_proc_mconsole : create_proc_entry " + "failed\n"); + return 0; + } + + ent->read_proc = NULL; + ent->write_proc = write_proc_mconsole; + return 0; +} + +static DEFINE_SPINLOCK(notify_spinlock); + +void lock_notify(void) +{ + spin_lock(¬ify_spinlock); +} + +void unlock_notify(void) +{ + spin_unlock(¬ify_spinlock); +} + +__initcall(create_proc_mconsole); + +#define NOTIFY "notify:" + +static int mconsole_setup(char *str) +{ + if (!strncmp(str, NOTIFY, strlen(NOTIFY))) { + str += strlen(NOTIFY); + notify_socket = str; + } + else printk(KERN_ERR "mconsole_setup : Unknown option - '%s'\n", str); + return 1; +} + +__setup("mconsole=", mconsole_setup); + +__uml_help(mconsole_setup, +"mconsole=notify:<socket>\n" +" Requests that the mconsole driver send a message to the named Unix\n" +" socket containing the name of the mconsole socket. This also serves\n" +" to notify outside processes when UML has booted far enough to respond\n" +" to mconsole requests.\n\n" +); + +static int notify_panic(struct notifier_block *self, unsigned long unused1, + void *ptr) +{ + char *message = ptr; + + if (notify_socket == NULL) + return 0; + + mconsole_notify(notify_socket, MCONSOLE_PANIC, message, + strlen(message) + 1); + return 0; +} + +static struct notifier_block panic_exit_notifier = { + .notifier_call = notify_panic, + .next = NULL, + .priority = 1 +}; + +static int add_notifier(void) +{ + atomic_notifier_chain_register(&panic_notifier_list, + &panic_exit_notifier); + return 0; +} + +__initcall(add_notifier); + +char *mconsole_notify_socket(void) +{ + return notify_socket; +} + +EXPORT_SYMBOL(mconsole_notify_socket); diff --git a/arch/um/drivers/mconsole_user.c b/arch/um/drivers/mconsole_user.c new file mode 100644 index 0000000..f8cf4c8 --- /dev/null +++ b/arch/um/drivers/mconsole_user.c @@ -0,0 +1,221 @@ +/* + * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include <errno.h> +#include <string.h> +#include <unistd.h> +#include <sys/socket.h> +#include <sys/uio.h> +#include <sys/un.h> +#include "kern_constants.h" +#include "mconsole.h" +#include "user.h" + +static struct mconsole_command commands[] = { + /* + * With uts namespaces, uts information becomes process-specific, so + * we need a process context. If we try handling this in interrupt + * context, we may hit an exiting process without a valid uts + * namespace. + */ + { "version", mconsole_version, MCONSOLE_PROC }, + { "halt", mconsole_halt, MCONSOLE_PROC }, + { "reboot", mconsole_reboot, MCONSOLE_PROC }, + { "config", mconsole_config, MCONSOLE_PROC }, + { "remove", mconsole_remove, MCONSOLE_PROC }, + { "sysrq", mconsole_sysrq, MCONSOLE_INTR }, + { "help", mconsole_help, MCONSOLE_INTR }, + { "cad", mconsole_cad, MCONSOLE_INTR }, + { "stop", mconsole_stop, MCONSOLE_PROC }, + { "go", mconsole_go, MCONSOLE_INTR }, + { "log", mconsole_log, MCONSOLE_INTR }, + { "proc", mconsole_proc, MCONSOLE_PROC }, + { "stack", mconsole_stack, MCONSOLE_INTR }, +}; + +/* Initialized in mconsole_init, which is an initcall */ +char mconsole_socket_name[256]; + +static int mconsole_reply_v0(struct mc_request *req, char *reply) +{ + struct iovec iov; + struct msghdr msg; + + iov.iov_base = reply; + iov.iov_len = strlen(reply); + + msg.msg_name = &(req->origin); + msg.msg_namelen = req->originlen; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = NULL; + msg.msg_controllen = 0; + msg.msg_flags = 0; + + return sendmsg(req->originating_fd, &msg, 0); +} + +static struct mconsole_command *mconsole_parse(struct mc_request *req) +{ + struct mconsole_command *cmd; + int i; + + for (i = 0; i < ARRAY_SIZE(commands); i++) { + cmd = &commands[i]; + if (!strncmp(req->request.data, cmd->command, + strlen(cmd->command))) { + return cmd; + } + } + return NULL; +} + +#define MIN(a,b) ((a)<(b) ? (a):(b)) + +#define STRINGX(x) #x +#define STRING(x) STRINGX(x) + +int mconsole_get_request(int fd, struct mc_request *req) +{ + int len; + + req->originlen = sizeof(req->origin); + req->len = recvfrom(fd, &req->request, sizeof(req->request), 0, + (struct sockaddr *) req->origin, &req->originlen); + if (req->len < 0) + return 0; + + req->originating_fd = fd; + + if (req->request.magic != MCONSOLE_MAGIC) { + /* Unversioned request */ + len = MIN(sizeof(req->request.data) - 1, + strlen((char *) &req->request)); + memmove(req->request.data, &req->request, len); + req->request.data[len] = '\0'; + + req->request.magic = MCONSOLE_MAGIC; + req->request.version = 0; + req->request.len = len; + + mconsole_reply_v0(req, "ERR Version 0 mconsole clients are " + "not supported by this driver"); + return 0; + } + + if (req->request.len >= MCONSOLE_MAX_DATA) { + mconsole_reply(req, "Request too large", 1, 0); + return 0; + } + if (req->request.version != MCONSOLE_VERSION) { + mconsole_reply(req, "This driver only supports version " + STRING(MCONSOLE_VERSION) " clients", 1, 0); + } + + req->request.data[req->request.len] = '\0'; + req->cmd = mconsole_parse(req); + if (req->cmd == NULL) { + mconsole_reply(req, "Unknown command", 1, 0); + return 0; + } + + return 1; +} + +int mconsole_reply_len(struct mc_request *req, const char *str, int total, + int err, int more) +{ + /* + * XXX This is a stack consumption problem. It'd be nice to + * make it global and serialize access to it, but there are a + * ton of callers to this function. + */ + struct mconsole_reply reply; + int len, n; + + do { + reply.err = err; + + /* err can only be true on the first packet */ + err = 0; + + len = MIN(total, MCONSOLE_MAX_DATA - 1); + + if (len == total) reply.more = more; + else reply.more = 1; + + memcpy(reply.data, str, len); + reply.data[len] = '\0'; + total -= len; + str += len; + reply.len = len + 1; + + len = sizeof(reply) + reply.len - sizeof(reply.data); + + n = sendto(req->originating_fd, &reply, len, 0, + (struct sockaddr *) req->origin, req->originlen); + + if (n < 0) + return -errno; + } while (total > 0); + return 0; +} + +int mconsole_reply(struct mc_request *req, const char *str, int err, int more) +{ + return mconsole_reply_len(req, str, strlen(str), err, more); +} + + +int mconsole_unlink_socket(void) +{ + unlink(mconsole_socket_name); + return 0; +} + +static int notify_sock = -1; + +int mconsole_notify(char *sock_name, int type, const void *data, int len) +{ + struct sockaddr_un target; + struct mconsole_notify packet; + int n, err = 0; + + lock_notify(); + if (notify_sock < 0) { + notify_sock = socket(PF_UNIX, SOCK_DGRAM, 0); + if (notify_sock < 0) { + err = -errno; + printk(UM_KERN_ERR "mconsole_notify - socket failed, " + "errno = %d\n", errno); + } + } + unlock_notify(); + + if (err) + return err; + + target.sun_family = AF_UNIX; + strcpy(target.sun_path, sock_name); + + packet.magic = MCONSOLE_MAGIC; + packet.version = MCONSOLE_VERSION; + packet.type = type; + len = (len > sizeof(packet.data)) ? sizeof(packet.data) : len; + packet.len = len; + memcpy(packet.data, data, len); + + err = 0; + len = sizeof(packet) + packet.len - sizeof(packet.data); + n = sendto(notify_sock, &packet, len, 0, (struct sockaddr *) &target, + sizeof(target)); + if (n < 0) { + err = -errno; + printk(UM_KERN_ERR "mconsole_notify - sendto failed, " + "errno = %d\n", errno); + } + return err; +} diff --git a/arch/um/drivers/mmapper_kern.c b/arch/um/drivers/mmapper_kern.c new file mode 100644 index 0000000..eb24032 --- /dev/null +++ b/arch/um/drivers/mmapper_kern.c @@ -0,0 +1,142 @@ +/* + * arch/um/drivers/mmapper_kern.c + * + * BRIEF MODULE DESCRIPTION + * + * Copyright (C) 2000 RidgeRun, Inc. + * Author: RidgeRun, Inc. + * Greg Lonnon glonnon@ridgerun.com or info@ridgerun.com + * + */ + +#include <linux/stddef.h> +#include <linux/types.h> +#include <linux/fs.h> +#include <linux/init.h> +#include <linux/miscdevice.h> +#include <linux/module.h> +#include <linux/mm.h> +#include <linux/smp_lock.h> +#include <asm/uaccess.h> +#include "mem_user.h" + +/* These are set in mmapper_init, which is called at boot time */ +static unsigned long mmapper_size; +static unsigned long p_buf; +static char *v_buf; + +static ssize_t mmapper_read(struct file *file, char __user *buf, size_t count, + loff_t *ppos) +{ + return simple_read_from_buffer(buf, count, ppos, v_buf, mmapper_size); +} + +static ssize_t mmapper_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + if (*ppos > mmapper_size) + return -EINVAL; + + if (count > mmapper_size - *ppos) + count = mmapper_size - *ppos; + + if (copy_from_user(&v_buf[*ppos], buf, count)) + return -EFAULT; + + return count; +} + +static int mmapper_ioctl(struct inode *inode, struct file *file, + unsigned int cmd, unsigned long arg) +{ + return -ENOIOCTLCMD; +} + +static int mmapper_mmap(struct file *file, struct vm_area_struct *vma) +{ + int ret = -EINVAL; + int size; + + if (vma->vm_pgoff != 0) + goto out; + + size = vma->vm_end - vma->vm_start; + if (size > mmapper_size) + return -EFAULT; + + /* + * XXX A comment above remap_pfn_range says it should only be + * called when the mm semaphore is held + */ + if (remap_pfn_range(vma, vma->vm_start, p_buf >> PAGE_SHIFT, size, + vma->vm_page_prot)) + goto out; + ret = 0; +out: + return ret; +} + +static int mmapper_open(struct inode *inode, struct file *file) +{ + cycle_kernel_lock(); + return 0; +} + +static int mmapper_release(struct inode *inode, struct file *file) +{ + return 0; +} + +static const struct file_operations mmapper_fops = { + .owner = THIS_MODULE, + .read = mmapper_read, + .write = mmapper_write, + .ioctl = mmapper_ioctl, + .mmap = mmapper_mmap, + .open = mmapper_open, + .release = mmapper_release, +}; + +/* + * No locking needed - only used (and modified) by below initcall and exitcall. + */ +static struct miscdevice mmapper_dev = { + .minor = MISC_DYNAMIC_MINOR, + .name = "mmapper", + .fops = &mmapper_fops +}; + +static int __init mmapper_init(void) +{ + int err; + + printk(KERN_INFO "Mapper v0.1\n"); + + v_buf = (char *) find_iomem("mmapper", &mmapper_size); + if (mmapper_size == 0) { + printk(KERN_ERR "mmapper_init - find_iomem failed\n"); + goto out; + } + + err = misc_register(&mmapper_dev); + if (err) { + printk(KERN_ERR "mmapper - misc_register failed, err = %d\n", + err); + goto out; + } + + p_buf = __pa(v_buf); +out: + return 0; +} + +static void mmapper_exit(void) +{ + misc_deregister(&mmapper_dev); +} + +module_init(mmapper_init); +module_exit(mmapper_exit); + +MODULE_AUTHOR("Greg Lonnon <glonnon@ridgerun.com>"); +MODULE_DESCRIPTION("DSPLinux simulator mmapper driver"); diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c new file mode 100644 index 0000000..5b4ca8d --- /dev/null +++ b/arch/um/drivers/net_kern.c @@ -0,0 +1,924 @@ +/* + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and + * James Leu (jleu@mindspring.net). + * Copyright (C) 2001 by various other people who didn't put their name here. + * Licensed under the GPL. + */ + +#include <linux/bootmem.h> +#include <linux/etherdevice.h> +#include <linux/ethtool.h> +#include <linux/inetdevice.h> +#include <linux/init.h> +#include <linux/list.h> +#include <linux/netdevice.h> +#include <linux/platform_device.h> +#include <linux/rtnetlink.h> +#include <linux/skbuff.h> +#include <linux/spinlock.h> +#include "init.h" +#include "irq_kern.h" +#include "irq_user.h" +#include "mconsole_kern.h" +#include "net_kern.h" +#include "net_user.h" + +static inline void set_ether_mac(struct net_device *dev, unsigned char *addr) +{ + memcpy(dev->dev_addr, addr, ETH_ALEN); +} + +#define DRIVER_NAME "uml-netdev" + +static DEFINE_SPINLOCK(opened_lock); +static LIST_HEAD(opened); + +/* + * The drop_skb is used when we can't allocate an skb. The + * packet is read into drop_skb in order to get the data off the + * connection to the host. + * It is reallocated whenever a maximum packet size is seen which is + * larger than any seen before. update_drop_skb is called from + * eth_configure when a new interface is added. + */ +static DEFINE_SPINLOCK(drop_lock); +static struct sk_buff *drop_skb; +static int drop_max; + +static int update_drop_skb(int max) +{ + struct sk_buff *new; + unsigned long flags; + int err = 0; + + spin_lock_irqsave(&drop_lock, flags); + + if (max <= drop_max) + goto out; + + err = -ENOMEM; + new = dev_alloc_skb(max); + if (new == NULL) + goto out; + + skb_put(new, max); + + kfree_skb(drop_skb); + drop_skb = new; + drop_max = max; + err = 0; +out: + spin_unlock_irqrestore(&drop_lock, flags); + + return err; +} + +static int uml_net_rx(struct net_device *dev) +{ + struct uml_net_private *lp = dev->priv; + int pkt_len; + struct sk_buff *skb; + + /* If we can't allocate memory, try again next round. */ + skb = dev_alloc_skb(lp->max_packet); + if (skb == NULL) { + drop_skb->dev = dev; + /* Read a packet into drop_skb and don't do anything with it. */ + (*lp->read)(lp->fd, drop_skb, lp); + lp->stats.rx_dropped++; + return 0; + } + + skb->dev = dev; + skb_put(skb, lp->max_packet); + skb_reset_mac_header(skb); + pkt_len = (*lp->read)(lp->fd, skb, lp); + + if (pkt_len > 0) { + skb_trim(skb, pkt_len); + skb->protocol = (*lp->protocol)(skb); + + lp->stats.rx_bytes += skb->len; + lp->stats.rx_packets++; + netif_rx(skb); + return pkt_len; + } + + kfree_skb(skb); + return pkt_len; +} + +static void uml_dev_close(struct work_struct *work) +{ + struct uml_net_private *lp = + container_of(work, struct uml_net_private, work); + dev_close(lp->dev); +} + +static irqreturn_t uml_net_interrupt(int irq, void *dev_id) +{ + struct net_device *dev = dev_id; + struct uml_net_private *lp = dev->priv; + int err; + + if (!netif_running(dev)) + return IRQ_NONE; + + spin_lock(&lp->lock); + while ((err = uml_net_rx(dev)) > 0) ; + if (err < 0) { + printk(KERN_ERR + "Device '%s' read returned %d, shutting it down\n", + dev->name, err); + /* dev_close can't be called in interrupt context, and takes + * again lp->lock. + * And dev_close() can be safely called multiple times on the + * same device, since it tests for (dev->flags & IFF_UP). So + * there's no harm in delaying the device shutdown. + * Furthermore, the workqueue will not re-enqueue an already + * enqueued work item. */ + schedule_work(&lp->work); + goto out; + } + reactivate_fd(lp->fd, UM_ETH_IRQ); + +out: + spin_unlock(&lp->lock); + return IRQ_HANDLED; +} + +static int uml_net_open(struct net_device *dev) +{ + struct uml_net_private *lp = dev->priv; + int err; + + if (lp->fd >= 0) { + err = -ENXIO; + goto out; + } + + lp->fd = (*lp->open)(&lp->user); + if (lp->fd < 0) { + err = lp->fd; + goto out; + } + + err = um_request_irq(dev->irq, lp->fd, IRQ_READ, uml_net_interrupt, + IRQF_DISABLED | IRQF_SHARED, dev->name, dev); + if (err != 0) { + printk(KERN_ERR "uml_net_open: failed to get irq(%d)\n", err); + err = -ENETUNREACH; + goto out_close; + } + + lp->tl.data = (unsigned long) &lp->user; + netif_start_queue(dev); + + /* clear buffer - it can happen that the host side of the interface + * is full when we get here. In this case, new data is never queued, + * SIGIOs never arrive, and the net never works. + */ + while ((err = uml_net_rx(dev)) > 0) ; + + spin_lock(&opened_lock); + list_add(&lp->list, &opened); + spin_unlock(&opened_lock); + + return 0; +out_close: + if (lp->close != NULL) (*lp->close)(lp->fd, &lp->user); + lp->fd = -1; +out: + return err; +} + +static int uml_net_close(struct net_device *dev) +{ + struct uml_net_private *lp = dev->priv; + + netif_stop_queue(dev); + + free_irq(dev->irq, dev); + if (lp->close != NULL) + (*lp->close)(lp->fd, &lp->user); + lp->fd = -1; + + spin_lock(&opened_lock); + list_del(&lp->list); + spin_unlock(&opened_lock); + + return 0; +} + +static int uml_net_start_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct uml_net_private *lp = dev->priv; + unsigned long flags; + int len; + + netif_stop_queue(dev); + + spin_lock_irqsave(&lp->lock, flags); + + len = (*lp->write)(lp->fd, skb, lp); + + if (len == skb->len) { + lp->stats.tx_packets++; + lp->stats.tx_bytes += skb->len; + dev->trans_start = jiffies; + netif_start_queue(dev); + + /* this is normally done in the interrupt when tx finishes */ + netif_wake_queue(dev); + } + else if (len == 0) { + netif_start_queue(dev); + lp->stats.tx_dropped++; + } + else { + netif_start_queue(dev); + printk(KERN_ERR "uml_net_start_xmit: failed(%d)\n", len); + } + + spin_unlock_irqrestore(&lp->lock, flags); + + dev_kfree_skb(skb); + + return 0; +} + +static struct net_device_stats *uml_net_get_stats(struct net_device *dev) +{ + struct uml_net_private *lp = dev->priv; + return &lp->stats; +} + +static void uml_net_set_multicast_list(struct net_device *dev) +{ + return; +} + +static void uml_net_tx_timeout(struct net_device *dev) +{ + dev->trans_start = jiffies; + netif_wake_queue(dev); +} + +static int uml_net_set_mac(struct net_device *dev, void *addr) +{ + struct uml_net_private *lp = dev->priv; + struct sockaddr *hwaddr = addr; + + spin_lock_irq(&lp->lock); + set_ether_mac(dev, hwaddr->sa_data); + spin_unlock_irq(&lp->lock); + + return 0; +} + +static int uml_net_change_mtu(struct net_device *dev, int new_mtu) +{ + dev->mtu = new_mtu; + + return 0; +} + +static void uml_net_get_drvinfo(struct net_device *dev, + struct ethtool_drvinfo *info) +{ + strcpy(info->driver, DRIVER_NAME); + strcpy(info->version, "42"); +} + +static struct ethtool_ops uml_net_ethtool_ops = { + .get_drvinfo = uml_net_get_drvinfo, + .get_link = ethtool_op_get_link, +}; + +static void uml_net_user_timer_expire(unsigned long _conn) +{ +#ifdef undef + struct connection *conn = (struct connection *)_conn; + + dprintk(KERN_INFO "uml_net_user_timer_expire [%p]\n", conn); + do_connect(conn); +#endif +} + +static void setup_etheraddr(char *str, unsigned char *addr, char *name) +{ + char *end; + int i; + + if (str == NULL) + goto random; + + for (i = 0; i < 6; i++) { + addr[i] = simple_strtoul(str, &end, 16); + if ((end == str) || + ((*end != ':') && (*end != ',') && (*end != '\0'))) { + printk(KERN_ERR + "setup_etheraddr: failed to parse '%s' " + "as an ethernet address\n", str); + goto random; + } + str = end + 1; + } + if (is_multicast_ether_addr(addr)) { + printk(KERN_ERR + "Attempt to assign a multicast ethernet address to a " + "device disallowed\n"); + goto random; + } + if (!is_valid_ether_addr(addr)) { + printk(KERN_ERR + "Attempt to assign an invalid ethernet address to a " + "device disallowed\n"); + goto random; + } + if (!is_local_ether_addr(addr)) { + printk(KERN_WARNING + "Warning: Assigning a globally valid ethernet " + "address to a device\n"); + printk(KERN_WARNING "You should set the 2nd rightmost bit in " + "the first byte of the MAC,\n"); + printk(KERN_WARNING "i.e. %02x:%02x:%02x:%02x:%02x:%02x\n", + addr[0] | 0x02, addr[1], addr[2], addr[3], addr[4], + addr[5]); + } + return; + +random: + printk(KERN_INFO + "Choosing a random ethernet address for device %s\n", name); + random_ether_addr(addr); +} + +static DEFINE_SPINLOCK(devices_lock); +static LIST_HEAD(devices); + +static struct platform_driver uml_net_driver = { + .driver = { + .name = DRIVER_NAME, + }, +}; + +static void net_device_release(struct device *dev) +{ + struct uml_net *device = dev->driver_data; + struct net_device *netdev = device->dev; + struct uml_net_private *lp = netdev->priv; + + if (lp->remove != NULL) + (*lp->remove)(&lp->user); + list_del(&device->list); + kfree(device); + free_netdev(netdev); +} + +/* + * Ensures that platform_driver_register is called only once by + * eth_configure. Will be set in an initcall. + */ +static int driver_registered; + +static void eth_configure(int n, void *init, char *mac, + struct transport *transport) +{ + struct uml_net *device; + struct net_device *dev; + struct uml_net_private *lp; + int err, size; + + size = transport->private_size + sizeof(struct uml_net_private); + + device = kzalloc(sizeof(*device), GFP_KERNEL); + if (device == NULL) { + printk(KERN_ERR "eth_configure failed to allocate struct " + "uml_net\n"); + return; + } + + dev = alloc_etherdev(size); + if (dev == NULL) { + printk(KERN_ERR "eth_configure: failed to allocate struct " + "net_device for eth%d\n", n); + goto out_free_device; + } + + INIT_LIST_HEAD(&device->list); + device->index = n; + + /* If this name ends up conflicting with an existing registered + * netdevice, that is OK, register_netdev{,ice}() will notice this + * and fail. + */ + snprintf(dev->name, sizeof(dev->name), "eth%d", n); + + setup_etheraddr(mac, device->mac, dev->name); + + printk(KERN_INFO "Netdevice %d ", n); + printk("(%02x:%02x:%02x:%02x:%02x:%02x) ", + device->mac[0], device->mac[1], + device->mac[2], device->mac[3], + device->mac[4], device->mac[5]); + printk(": "); + + lp = dev->priv; + /* This points to the transport private data. It's still clear, but we + * must memset it to 0 *now*. Let's help the drivers. */ + memset(lp, 0, size); + INIT_WORK(&lp->work, uml_dev_close); + + /* sysfs register */ + if (!driver_registered) { + platform_driver_register(¨_net_driver); + driver_registered = 1; + } + device->pdev.id = n; + device->pdev.name = DRIVER_NAME; + device->pdev.dev.release = net_device_release; + device->pdev.dev.driver_data = device; + if (platform_device_register(&device->pdev)) + goto out_free_netdev; + SET_NETDEV_DEV(dev,&device->pdev.dev); + + device->dev = dev; + + /* + * These just fill in a data structure, so there's no failure + * to be worried about. + */ + (*transport->kern->init)(dev, init); + + *lp = ((struct uml_net_private) + { .list = LIST_HEAD_INIT(lp->list), + .dev = dev, + .fd = -1, + .mac = { 0xfe, 0xfd, 0x0, 0x0, 0x0, 0x0}, + .max_packet = transport->user->max_packet, + .protocol = transport->kern->protocol, + .open = transport->user->open, + .close = transport->user->close, + .remove = transport->user->remove, + .read = transport->kern->read, + .write = transport->kern->write, + .add_address = transport->user->add_address, + .delete_address = transport->user->delete_address }); + + init_timer(&lp->tl); + spin_lock_init(&lp->lock); + lp->tl.function = uml_net_user_timer_expire; + memcpy(lp->mac, device->mac, sizeof(lp->mac)); + + if ((transport->user->init != NULL) && + ((*transport->user->init)(&lp->user, dev) != 0)) + goto out_unregister; + + set_ether_mac(dev, device->mac); + dev->mtu = transport->user->mtu; + dev->open = uml_net_open; + dev->hard_start_xmit = uml_net_start_xmit; + dev->stop = uml_net_close; + dev->get_stats = uml_net_get_stats; + dev->set_multicast_list = uml_net_set_multicast_list; + dev->tx_timeout = uml_net_tx_timeout; + dev->set_mac_address = uml_net_set_mac; + dev->change_mtu = uml_net_change_mtu; + dev->ethtool_ops = ¨_net_ethtool_ops; + dev->watchdog_timeo = (HZ >> 1); + dev->irq = UM_ETH_IRQ; + + err = update_drop_skb(lp->max_packet); + if (err) + goto out_undo_user_init; + + rtnl_lock(); + err = register_netdevice(dev); + rtnl_unlock(); + if (err) + goto out_undo_user_init; + + spin_lock(&devices_lock); + list_add(&device->list, &devices); + spin_unlock(&devices_lock); + + return; + +out_undo_user_init: + if (transport->user->remove != NULL) + (*transport->user->remove)(&lp->user); +out_unregister: + platform_device_unregister(&device->pdev); + return; /* platform_device_unregister frees dev and device */ +out_free_netdev: + free_netdev(dev); +out_free_device: + kfree(device); +} + +static struct uml_net *find_device(int n) +{ + struct uml_net *device; + struct list_head *ele; + + spin_lock(&devices_lock); + list_for_each(ele, &devices) { + device = list_entry(ele, struct uml_net, list); + if (device->index == n) + goto out; + } + device = NULL; + out: + spin_unlock(&devices_lock); + return device; +} + +static int eth_parse(char *str, int *index_out, char **str_out, + char **error_out) +{ + char *end; + int n, err = -EINVAL;; + + n = simple_strtoul(str, &end, 0); + if (end == str) { + *error_out = "Bad device number"; + return err; + } + + str = end; + if (*str != '=') { + *error_out = "Expected '=' after device number"; + return err; + } + + str++; + if (find_device(n)) { + *error_out = "Device already configured"; + return err; + } + + *index_out = n; + *str_out = str; + return 0; +} + +struct eth_init { + struct list_head list; + char *init; + int index; +}; + +static DEFINE_SPINLOCK(transports_lock); +static LIST_HEAD(transports); + +/* Filled in during early boot */ +static LIST_HEAD(eth_cmd_line); + +static int check_transport(struct transport *transport, char *eth, int n, + void **init_out, char **mac_out) +{ + int len; + + len = strlen(transport->name); + if (strncmp(eth, transport->name, len)) + return 0; + + eth += len; + if (*eth == ',') + eth++; + else if (*eth != '\0') + return 0; + + *init_out = kmalloc(transport->setup_size, GFP_KERNEL); + if (*init_out == NULL) + return 1; + + if (!transport->setup(eth, mac_out, *init_out)) { + kfree(*init_out); + *init_out = NULL; + } + return 1; +} + +void register_transport(struct transport *new) +{ + struct list_head *ele, *next; + struct eth_init *eth; + void *init; + char *mac = NULL; + int match; + + spin_lock(&transports_lock); + BUG_ON(!list_empty(&new->list)); + list_add(&new->list, &transports); + spin_unlock(&transports_lock); + + list_for_each_safe(ele, next, ð_cmd_line) { + eth = list_entry(ele, struct eth_init, list); + match = check_transport(new, eth->init, eth->index, &init, + &mac); + if (!match) + continue; + else if (init != NULL) { + eth_configure(eth->index, init, mac, new); + kfree(init); + } + list_del(ð->list); + } +} + +static int eth_setup_common(char *str, int index) +{ + struct list_head *ele; + struct transport *transport; + void *init; + char *mac = NULL; + int found = 0; + + spin_lock(&transports_lock); + list_for_each(ele, &transports) { + transport = list_entry(ele, struct transport, list); + if (!check_transport(transport, str, index, &init, &mac)) + continue; + if (init != NULL) { + eth_configure(index, init, mac, transport); + kfree(init); + } + found = 1; + break; + } + + spin_unlock(&transports_lock); + return found; +} + +static int __init eth_setup(char *str) +{ + struct eth_init *new; + char *error; + int n, err; + + err = eth_parse(str, &n, &str, &error); + if (err) { + printk(KERN_ERR "eth_setup - Couldn't parse '%s' : %s\n", + str, error); + return 1; + } + + new = alloc_bootmem(sizeof(*new)); + if (new == NULL) { + printk(KERN_ERR "eth_init : alloc_bootmem failed\n"); + return 1; + } + + INIT_LIST_HEAD(&new->list); + new->index = n; + new->init = str; + + list_add_tail(&new->list, ð_cmd_line); + return 1; +} + +__setup("eth", eth_setup); +__uml_help(eth_setup, +"eth[0-9]+=<transport>,<options>\n" +" Configure a network device.\n\n" +); + +static int net_config(char *str, char **error_out) +{ + int n, err; + + err = eth_parse(str, &n, &str, error_out); + if (err) + return err; + + /* This string is broken up and the pieces used by the underlying + * driver. So, it is freed only if eth_setup_common fails. + */ + str = kstrdup(str, GFP_KERNEL); + if (str == NULL) { + *error_out = "net_config failed to strdup string"; + return -ENOMEM; + } + err = !eth_setup_common(str, n); + if (err) + kfree(str); + return err; +} + +static int net_id(char **str, int *start_out, int *end_out) +{ + char *end; + int n; + + n = simple_strtoul(*str, &end, 0); + if ((*end != '\0') || (end == *str)) + return -1; + + *start_out = n; + *end_out = n; + *str = end; + return n; +} + +static int net_remove(int n, char **error_out) +{ + struct uml_net *device; + struct net_device *dev; + struct uml_net_private *lp; + + device = find_device(n); + if (device == NULL) + return -ENODEV; + + dev = device->dev; + lp = dev->priv; + if (lp->fd > 0) + return -EBUSY; + unregister_netdev(dev); + platform_device_unregister(&device->pdev); + + return 0; +} + +static struct mc_device net_mc = { + .list = LIST_HEAD_INIT(net_mc.list), + .name = "eth", + .config = net_config, + .get_config = NULL, + .id = net_id, + .remove = net_remove, +}; + +#ifdef CONFIG_INET +static int uml_inetaddr_event(struct notifier_block *this, unsigned long event, + void *ptr) +{ + struct in_ifaddr *ifa = ptr; + struct net_device *dev = ifa->ifa_dev->dev; + struct uml_net_private *lp; + void (*proc)(unsigned char *, unsigned char *, void *); + unsigned char addr_buf[4], netmask_buf[4]; + + if (dev->open != uml_net_open) + return NOTIFY_DONE; + + lp = dev->priv; + + proc = NULL; + switch (event) { + case NETDEV_UP: + proc = lp->add_address; + break; + case NETDEV_DOWN: + proc = lp->delete_address; + break; + } + if (proc != NULL) { + memcpy(addr_buf, &ifa->ifa_address, sizeof(addr_buf)); + memcpy(netmask_buf, &ifa->ifa_mask, sizeof(netmask_buf)); + (*proc)(addr_buf, netmask_buf, &lp->user); + } + return NOTIFY_DONE; +} + +/* uml_net_init shouldn't be called twice on two CPUs at the same time */ +static struct notifier_block uml_inetaddr_notifier = { + .notifier_call = uml_inetaddr_event, +}; + +static void inet_register(void) +{ + struct list_head *ele; + struct uml_net_private *lp; + struct in_device *ip; + struct in_ifaddr *in; + + register_inetaddr_notifier(¨_inetaddr_notifier); + + /* Devices may have been opened already, so the uml_inetaddr_notifier + * didn't get a chance to run for them. This fakes it so that + * addresses which have already been set up get handled properly. + */ + spin_lock(&opened_lock); + list_for_each(ele, &opened) { + lp = list_entry(ele, struct uml_net_private, list); + ip = lp->dev->ip_ptr; + if (ip == NULL) + continue; + in = ip->ifa_list; + while (in != NULL) { + uml_inetaddr_event(NULL, NETDEV_UP, in); + in = in->ifa_next; + } + } + spin_unlock(&opened_lock); +} +#else +static inline void inet_register(void) +{ +} +#endif + +static int uml_net_init(void) +{ + mconsole_register_dev(&net_mc); + inet_register(); + return 0; +} + +__initcall(uml_net_init); + +static void close_devices(void) +{ + struct list_head *ele; + struct uml_net_private *lp; + + spin_lock(&opened_lock); + list_for_each(ele, &opened) { + lp = list_entry(ele, struct uml_net_private, list); + free_irq(lp->dev->irq, lp->dev); + if ((lp->close != NULL) && (lp->fd >= 0)) + (*lp->close)(lp->fd, &lp->user); + if (lp->remove != NULL) + (*lp->remove)(&lp->user); + } + spin_unlock(&opened_lock); +} + +__uml_exitcall(close_devices); + +void iter_addresses(void *d, void (*cb)(unsigned char *, unsigned char *, + void *), + void *arg) +{ + struct net_device *dev = d; + struct in_device *ip = dev->ip_ptr; + struct in_ifaddr *in; + unsigned char address[4], netmask[4]; + + if (ip == NULL) return; + in = ip->ifa_list; + while (in != NULL) { + memcpy(address, &in->ifa_address, sizeof(address)); + memcpy(netmask, &in->ifa_mask, sizeof(netmask)); + (*cb)(address, netmask, arg); + in = in->ifa_next; + } +} + +int dev_netmask(void *d, void *m) +{ + struct net_device *dev = d; + struct in_device *ip = dev->ip_ptr; + struct in_ifaddr *in; + __be32 *mask_out = m; + + if (ip == NULL) + return 1; + + in = ip->ifa_list; + if (in == NULL) + return 1; + + *mask_out = in->ifa_mask; + return 0; +} + +void *get_output_buffer(int *len_out) +{ + void *ret; + + ret = (void *) __get_free_pages(GFP_KERNEL, 0); + if (ret) *len_out = PAGE_SIZE; + else *len_out = 0; + return ret; +} + +void free_output_buffer(void *buffer) +{ + free_pages((unsigned long) buffer, 0); +} + +int tap_setup_common(char *str, char *type, char **dev_name, char **mac_out, + char **gate_addr) +{ + char *remain; + + remain = split_if_spec(str, dev_name, mac_out, gate_addr, NULL); + if (remain != NULL) { + printk(KERN_ERR "tap_setup_common - Extra garbage on " + "specification : '%s'\n", remain); + return 1; + } + + return 0; +} + +unsigned short eth_protocol(struct sk_buff *skb) +{ + return eth_type_trans(skb, skb->dev); +} diff --git a/arch/um/drivers/net_user.c b/arch/um/drivers/net_user.c new file mode 100644 index 0000000..9415dd9 --- /dev/null +++ b/arch/um/drivers/net_user.c @@ -0,0 +1,268 @@ +/* + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include <stdio.h> +#include <unistd.h> +#include <stdarg.h> +#include <errno.h> +#include <stddef.h> +#include <string.h> +#include <sys/socket.h> +#include <sys/wait.h> +#include "net_user.h" +#include "kern_constants.h" +#include "os.h" +#include "um_malloc.h" +#include "user.h" + +int tap_open_common(void *dev, char *gate_addr) +{ + int tap_addr[4]; + + if (gate_addr == NULL) + return 0; + if (sscanf(gate_addr, "%d.%d.%d.%d", &tap_addr[0], + &tap_addr[1], &tap_addr[2], &tap_addr[3]) != 4) { + printk(UM_KERN_ERR "Invalid tap IP address - '%s'\n", + gate_addr); + return -EINVAL; + } + return 0; +} + +void tap_check_ips(char *gate_addr, unsigned char *eth_addr) +{ + int tap_addr[4]; + + if ((gate_addr != NULL) && + (sscanf(gate_addr, "%d.%d.%d.%d", &tap_addr[0], + &tap_addr[1], &tap_addr[2], &tap_addr[3]) == 4) && + (eth_addr[0] == tap_addr[0]) && + (eth_addr[1] == tap_addr[1]) && + (eth_addr[2] == tap_addr[2]) && + (eth_addr[3] == tap_addr[3])) { + printk(UM_KERN_ERR "The tap IP address and the UML eth IP " + "address must be different\n"); + } +} + +/* Do reliable error handling as this fails frequently enough. */ +void read_output(int fd, char *output, int len) +{ + int remain, ret, expected; + char c; + char *str; + + if (output == NULL) { + output = &c; + len = sizeof(c); + } + + *output = '\0'; + ret = read(fd, &remain, sizeof(remain)); + + if (ret != sizeof(remain)) { + if (ret < 0) + ret = -errno; + expected = sizeof(remain); + str = "length"; + goto err; + } + + while (remain != 0) { + expected = (remain < len) ? remain : len; + ret = read(fd, output, expected); + if (ret != expected) { + if (ret < 0) + ret = -errno; + str = "data"; + goto err; + } + remain -= ret; + } + + return; + +err: + if (ret < 0) + printk(UM_KERN_ERR "read_output - read of %s failed, " + "errno = %d\n", str, -ret); + else + printk(UM_KERN_ERR "read_output - read of %s failed, read only " + "%d of %d bytes\n", str, ret, expected); +} + +int net_read(int fd, void *buf, int len) +{ + int n; + + n = read(fd, buf, len); + + if ((n < 0) && (errno == EAGAIN)) + return 0; + else if (n == 0) + return -ENOTCONN; + return n; +} + +int net_recvfrom(int fd, void *buf, int len) +{ + int n; + + CATCH_EINTR(n = recvfrom(fd, buf, len, 0, NULL, NULL)); + if (n < 0) { + if (errno == EAGAIN) + return 0; + return -errno; + } + else if (n == 0) + return -ENOTCONN; + return n; +} + +int net_write(int fd, void *buf, int len) +{ + int n; + + n = write(fd, buf, len); + + if ((n < 0) && (errno == EAGAIN)) + return 0; + else if (n == 0) + return -ENOTCONN; + return n; +} + +int net_send(int fd, void *buf, int len) +{ + int n; + + CATCH_EINTR(n = send(fd, buf, len, 0)); + if (n < 0) { + if (errno == EAGAIN) + return 0; + return -errno; + } + else if (n == 0) + return -ENOTCONN; + return n; +} + +int net_sendto(int fd, void *buf, int len, void *to, int sock_len) +{ + int n; + + CATCH_EINTR(n = sendto(fd, buf, len, 0, (struct sockaddr *) to, + sock_len)); + if (n < 0) { + if (errno == EAGAIN) + return 0; + return -errno; + } + else if (n == 0) + return -ENOTCONN; + return n; +} + +struct change_pre_exec_data { + int close_me; + int stdout; +}; + +static void change_pre_exec(void *arg) +{ + struct change_pre_exec_data *data = arg; + + close(data->close_me); + dup2(data->stdout, 1); +} + +static int change_tramp(char **argv, char *output, int output_len) +{ + int pid, fds[2], err; + struct change_pre_exec_data pe_data; + + err = os_pipe(fds, 1, 0); + if (err < 0) { + printk(UM_KERN_ERR "change_tramp - pipe failed, err = %d\n", + -err); + return err; + } + pe_data.close_me = fds[0]; + pe_data.stdout = fds[1]; + pid = run_helper(change_pre_exec, &pe_data, argv); + + if (pid > 0) /* Avoid hang as we won't get data in failure case. */ + read_output(fds[0], output, output_len); + + close(fds[0]); + close(fds[1]); + + if (pid > 0) + helper_wait(pid); + return pid; +} + +static void change(char *dev, char *what, unsigned char *addr, + unsigned char *netmask) +{ + char addr_buf[sizeof("255.255.255.255\0")]; + char netmask_buf[sizeof("255.255.255.255\0")]; + char version[sizeof("nnnnn\0")]; + char *argv[] = { "uml_net", version, what, dev, addr_buf, + netmask_buf, NULL }; + char *output; + int output_len, pid; + + sprintf(version, "%d", UML_NET_VERSION); + sprintf(addr_buf, "%d.%d.%d.%d", addr[0], addr[1], addr[2], addr[3]); + sprintf(netmask_buf, "%d.%d.%d.%d", netmask[0], netmask[1], + netmask[2], netmask[3]); + + output_len = UM_KERN_PAGE_SIZE; + output = uml_kmalloc(output_len, UM_GFP_KERNEL); + if (output == NULL) + printk(UM_KERN_ERR "change : failed to allocate output " + "buffer\n"); + + pid = change_tramp(argv, output, output_len); + if (pid < 0) return; + + if (output != NULL) { + printk("%s", output); + kfree(output); + } +} + +void open_addr(unsigned char *addr, unsigned char *netmask, void *arg) +{ + change(arg, "add", addr, netmask); +} + +void close_addr(unsigned char *addr, unsigned char *netmask, void *arg) +{ + change(arg, "del", addr, netmask); +} + +char *split_if_spec(char *str, ...) +{ + char **arg, *end; + va_list ap; + + va_start(ap, str); + while ((arg = va_arg(ap, char **)) != NULL) { + if (*str == '\0') + return NULL; + end = strchr(str, ','); + if (end != str) + *arg = str; + if (end == NULL) + return NULL; + *end++ = '\0'; + str = end; + } + va_end(ap); + return str; +} diff --git a/arch/um/drivers/null.c b/arch/um/drivers/null.c new file mode 100644 index 0000000..2b45a14 --- /dev/null +++ b/arch/um/drivers/null.c @@ -0,0 +1,51 @@ +/* + * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{linux.intel,addtoit}.com) + * Licensed under the GPL + */ + +#include <stddef.h> +#include <errno.h> +#include <fcntl.h> +#include "chan_user.h" +#include "os.h" + +/* This address is used only as a unique identifier */ +static int null_chan; + +static void *null_init(char *str, int device, const struct chan_opts *opts) +{ + return &null_chan; +} + +static int null_open(int input, int output, int primary, void *d, + char **dev_out) +{ + int fd; + + *dev_out = NULL; + + fd = open(DEV_NULL, O_RDWR); + return (fd < 0) ? -errno : fd; +} + +static int null_read(int fd, char *c_out, void *unused) +{ + return -ENODEV; +} + +static void null_free(void *data) +{ +} + +const struct chan_ops null_ops = { + .type = "null", + .init = null_init, + .open = null_open, + .close = generic_close, + .read = null_read, + .write = generic_write, + .console_write = generic_console_write, + .window_size = generic_window_size, + .free = null_free, + .winch = 0, +}; diff --git a/arch/um/drivers/pcap_kern.c b/arch/um/drivers/pcap_kern.c new file mode 100644 index 0000000..3a750dd --- /dev/null +++ b/arch/um/drivers/pcap_kern.c @@ -0,0 +1,113 @@ +/* + * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL. + */ + +#include "linux/init.h" +#include <linux/netdevice.h> +#include "net_kern.h" +#include "pcap_user.h" + +struct pcap_init { + char *host_if; + int promisc; + int optimize; + char *filter; +}; + +void pcap_init(struct net_device *dev, void *data) +{ + struct uml_net_private *pri; + struct pcap_data *ppri; + struct pcap_init *init = data; + + pri = dev->priv; + ppri = (struct pcap_data *) pri->user; + ppri->host_if = init->host_if; + ppri->promisc = init->promisc; + ppri->optimize = init->optimize; + ppri->filter = init->filter; + + printk("pcap backend, host interface %s\n", ppri->host_if); +} + +static int pcap_read(int fd, struct sk_buff *skb, struct uml_net_private *lp) +{ + return pcap_user_read(fd, skb_mac_header(skb), + skb->dev->mtu + ETH_HEADER_OTHER, + (struct pcap_data *) &lp->user); +} + +static int pcap_write(int fd, struct sk_buff *skb, struct uml_net_private *lp) +{ + return -EPERM; +} + +static const struct net_kern_info pcap_kern_info = { + .init = pcap_init, + .protocol = eth_protocol, + .read = pcap_read, + .write = pcap_write, +}; + +int pcap_setup(char *str, char **mac_out, void *data) +{ + struct pcap_init *init = data; + char *remain, *host_if = NULL, *options[2] = { NULL, NULL }; + int i; + + *init = ((struct pcap_init) + { .host_if = "eth0", + .promisc = 1, + .optimize = 0, + .filter = NULL }); + + remain = split_if_spec(str, &host_if, &init->filter, + &options[0], &options[1], mac_out, NULL); + if (remain != NULL) { + printk(KERN_ERR "pcap_setup - Extra garbage on " + "specification : '%s'\n", remain); + return 0; + } + + if (host_if != NULL) + init->host_if = host_if; + + for (i = 0; i < ARRAY_SIZE(options); i++) { + if (options[i] == NULL) + continue; + if (!strcmp(options[i], "promisc")) + init->promisc = 1; + else if (!strcmp(options[i], "nopromisc")) + init->promisc = 0; + else if (!strcmp(options[i], "optimize")) + init->optimize = 1; + else if (!strcmp(options[i], "nooptimize")) + init->optimize = 0; + else { + printk(KERN_ERR "pcap_setup : bad option - '%s'\n", + options[i]); + return 0; + } + } + + return 1; +} + +static struct transport pcap_transport = { + .list = LIST_HEAD_INIT(pcap_transport.list), + .name = "pcap", + .setup = pcap_setup, + .user = &pcap_user_info, + .kern = &pcap_kern_info, + .private_size = sizeof(struct pcap_data), + .setup_size = sizeof(struct pcap_init), +}; + +static int register_pcap(void) +{ + register_transport(&pcap_transport); + return 0; +} + +late_initcall(register_pcap); diff --git a/arch/um/drivers/pcap_user.c b/arch/um/drivers/pcap_user.c new file mode 100644 index 0000000..5f90358 --- /dev/null +++ b/arch/um/drivers/pcap_user.c @@ -0,0 +1,139 @@ +/* + * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL. + */ + +#include <errno.h> +#include <pcap.h> +#include <string.h> +#include <asm/types.h> +#include "net_user.h" +#include "pcap_user.h" +#include "kern_constants.h" +#include "um_malloc.h" +#include "user.h" + +#define PCAP_FD(p) (*(int *)(p)) + +static int pcap_user_init(void *data, void *dev) +{ + struct pcap_data *pri = data; + pcap_t *p; + char errors[PCAP_ERRBUF_SIZE]; + + p = pcap_open_live(pri->host_if, ETH_MAX_PACKET + ETH_HEADER_OTHER, + pri->promisc, 0, errors); + if (p == NULL) { + printk(UM_KERN_ERR "pcap_user_init : pcap_open_live failed - " + "'%s'\n", errors); + return -EINVAL; + } + + pri->dev = dev; + pri->pcap = p; + return 0; +} + +static int pcap_open(void *data) +{ + struct pcap_data *pri = data; + __u32 netmask; + int err; + + if (pri->pcap == NULL) + return -ENODEV; + + if (pri->filter != NULL) { + err = dev_netmask(pri->dev, &netmask); + if (err < 0) { + printk(UM_KERN_ERR "pcap_open : dev_netmask failed\n"); + return -EIO; + } + + pri->compiled = uml_kmalloc(sizeof(struct bpf_program), + UM_GFP_KERNEL); + if (pri->compiled == NULL) { + printk(UM_KERN_ERR "pcap_open : kmalloc failed\n"); + return -ENOMEM; + } + + err = pcap_compile(pri->pcap, + (struct bpf_program *) pri->compiled, + pri->filter, pri->optimize, netmask); + if (err < 0) { + printk(UM_KERN_ERR "pcap_open : pcap_compile failed - " + "'%s'\n", pcap_geterr(pri->pcap)); + goto out; + } + + err = pcap_setfilter(pri->pcap, pri->compiled); + if (err < 0) { + printk(UM_KERN_ERR "pcap_open : pcap_setfilter " + "failed - '%s'\n", pcap_geterr(pri->pcap)); + goto out; + } + } + + return PCAP_FD(pri->pcap); + + out: + kfree(pri->compiled); + return -EIO; +} + +static void pcap_remove(void *data) +{ + struct pcap_data *pri = data; + + if (pri->compiled != NULL) + pcap_freecode(pri->compiled); + + if (pri->pcap != NULL) + pcap_close(pri->pcap); +} + +struct pcap_handler_data { + char *buffer; + int len; +}; + +static void handler(u_char *data, const struct pcap_pkthdr *header, + const u_char *packet) +{ + int len; + + struct pcap_handler_data *hdata = (struct pcap_handler_data *) data; + + len = hdata->len < header->caplen ? hdata->len : header->caplen; + memcpy(hdata->buffer, packet, len); + hdata->len = len; +} + +int pcap_user_read(int fd, void *buffer, int len, struct pcap_data *pri) +{ + struct pcap_handler_data hdata = ((struct pcap_handler_data) + { .buffer = buffer, + .len = len }); + int n; + + n = pcap_dispatch(pri->pcap, 1, handler, (u_char *) &hdata); + if (n < 0) { + printk(UM_KERN_ERR "pcap_dispatch failed - %s\n", + pcap_geterr(pri->pcap)); + return -EIO; + } + else if (n == 0) + return 0; + return hdata.len; +} + +const struct net_user_info pcap_user_info = { + .init = pcap_user_init, + .open = pcap_open, + .close = NULL, + .remove = pcap_remove, + .add_address = NULL, + .delete_address = NULL, + .mtu = ETH_MAX_PACKET, + .max_packet = ETH_MAX_PACKET + ETH_HEADER_OTHER, +}; diff --git a/arch/um/drivers/pcap_user.h b/arch/um/drivers/pcap_user.h new file mode 100644 index 0000000..96b80b5 --- /dev/null +++ b/arch/um/drivers/pcap_user.h @@ -0,0 +1,31 @@ +/* + * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#include "net_user.h" + +struct pcap_data { + char *host_if; + int promisc; + int optimize; + char *filter; + void *compiled; + void *pcap; + void *dev; +}; + +extern const struct net_user_info pcap_user_info; + +extern int pcap_user_read(int fd, void *buf, int len, struct pcap_data *pri); + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff --git a/arch/um/drivers/port.h b/arch/um/drivers/port.h new file mode 100644 index 0000000..9117609 --- /dev/null +++ b/arch/um/drivers/port.h @@ -0,0 +1,30 @@ +/* + * Copyright (C) 2001 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __PORT_H__ +#define __PORT_H__ + +extern void *port_data(int port); +extern int port_wait(void *data); +extern void port_kern_close(void *d); +extern int port_connection(int fd, int *socket_out, int *pid_out); +extern int port_listen_fd(int port); +extern void port_read(int fd, void *data); +extern void port_kern_free(void *d); +extern int port_rcv_fd(int fd); +extern void port_remove_dev(void *d); + +#endif + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff --git a/arch/um/drivers/port_kern.c b/arch/um/drivers/port_kern.c new file mode 100644 index 0000000..1993008 --- /dev/null +++ b/arch/um/drivers/port_kern.c @@ -0,0 +1,304 @@ +/* + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{linux.intel,addtoit}.com) + * Licensed under the GPL + */ + +#include "linux/completion.h" +#include "linux/interrupt.h" +#include "linux/list.h" +#include "linux/mutex.h" +#include "asm/atomic.h" +#include "init.h" +#include "irq_kern.h" +#include "os.h" +#include "port.h" + +struct port_list { + struct list_head list; + atomic_t wait_count; + int has_connection; + struct completion done; + int port; + int fd; + spinlock_t lock; + struct list_head pending; + struct list_head connections; +}; + +struct port_dev { + struct port_list *port; + int helper_pid; + int telnetd_pid; +}; + +struct connection { + struct list_head list; + int fd; + int helper_pid; + int socket[2]; + int telnetd_pid; + struct port_list *port; +}; + +static irqreturn_t pipe_interrupt(int irq, void *data) +{ + struct connection *conn = data; + int fd; + + fd = os_rcv_fd(conn->socket[0], &conn->helper_pid); + if (fd < 0) { + if (fd == -EAGAIN) + return IRQ_NONE; + + printk(KERN_ERR "pipe_interrupt : os_rcv_fd returned %d\n", + -fd); + os_close_file(conn->fd); + } + + list_del(&conn->list); + + conn->fd = fd; + list_add(&conn->list, &conn->port->connections); + + complete(&conn->port->done); + return IRQ_HANDLED; +} + +#define NO_WAITER_MSG \ + "****\n" \ + "There are currently no UML consoles waiting for port connections.\n" \ + "Either disconnect from one to make it available or activate some more\n" \ + "by enabling more consoles in the UML /etc/inittab.\n" \ + "****\n" + +static int port_accept(struct port_list *port) +{ + struct connection *conn; + int fd, socket[2], pid; + + fd = port_connection(port->fd, socket, &pid); + if (fd < 0) { + if (fd != -EAGAIN) + printk(KERN_ERR "port_accept : port_connection " + "returned %d\n", -fd); + goto out; + } + + conn = kmalloc(sizeof(*conn), GFP_ATOMIC); + if (conn == NULL) { + printk(KERN_ERR "port_accept : failed to allocate " + "connection\n"); + goto out_close; + } + *conn = ((struct connection) + { .list = LIST_HEAD_INIT(conn->list), + .fd = fd, + .socket = { socket[0], socket[1] }, + .telnetd_pid = pid, + .port = port }); + + if (um_request_irq(TELNETD_IRQ, socket[0], IRQ_READ, pipe_interrupt, + IRQF_DISABLED | IRQF_SHARED | IRQF_SAMPLE_RANDOM, + "telnetd", conn)) { + printk(KERN_ERR "port_accept : failed to get IRQ for " + "telnetd\n"); + goto out_free; + } + + if (atomic_read(&port->wait_count) == 0) { + os_write_file(fd, NO_WAITER_MSG, sizeof(NO_WAITER_MSG)); + printk(KERN_ERR "No one waiting for port\n"); + } + list_add(&conn->list, &port->pending); + return 1; + + out_free: + kfree(conn); + out_close: + os_close_file(fd); + os_kill_process(pid, 1); + out: + return 0; +} + +static DEFINE_MUTEX(ports_mutex); +static LIST_HEAD(ports); + +static void port_work_proc(struct work_struct *unused) +{ + struct port_list *port; + struct list_head *ele; + unsigned long flags; + + local_irq_save(flags); + list_for_each(ele, &ports) { + port = list_entry(ele, struct port_list, list); + if (!port->has_connection) + continue; + + reactivate_fd(port->fd, ACCEPT_IRQ); + while (port_accept(port)) + ; + port->has_connection = 0; + } + local_irq_restore(flags); +} + +DECLARE_WORK(port_work, port_work_proc); + +static irqreturn_t port_interrupt(int irq, void *data) +{ + struct port_list *port = data; + + port->has_connection = 1; + schedule_work(&port_work); + return IRQ_HANDLED; +} + +void *port_data(int port_num) +{ + struct list_head *ele; + struct port_list *port; + struct port_dev *dev = NULL; + int fd; + + mutex_lock(&ports_mutex); + list_for_each(ele, &ports) { + port = list_entry(ele, struct port_list, list); + if (port->port == port_num) + goto found; + } + port = kmalloc(sizeof(struct port_list), GFP_KERNEL); + if (port == NULL) { + printk(KERN_ERR "Allocation of port list failed\n"); + goto out; + } + + fd = port_listen_fd(port_num); + if (fd < 0) { + printk(KERN_ERR "binding to port %d failed, errno = %d\n", + port_num, -fd); + goto out_free; + } + + if (um_request_irq(ACCEPT_IRQ, fd, IRQ_READ, port_interrupt, + IRQF_DISABLED | IRQF_SHARED | IRQF_SAMPLE_RANDOM, + "port", port)) { + printk(KERN_ERR "Failed to get IRQ for port %d\n", port_num); + goto out_close; + } + + *port = ((struct port_list) + { .list = LIST_HEAD_INIT(port->list), + .wait_count = ATOMIC_INIT(0), + .has_connection = 0, + .port = port_num, + .fd = fd, + .pending = LIST_HEAD_INIT(port->pending), + .connections = LIST_HEAD_INIT(port->connections) }); + spin_lock_init(&port->lock); + init_completion(&port->done); + list_add(&port->list, &ports); + + found: + dev = kmalloc(sizeof(struct port_dev), GFP_KERNEL); + if (dev == NULL) { + printk(KERN_ERR "Allocation of port device entry failed\n"); + goto out; + } + + *dev = ((struct port_dev) { .port = port, + .helper_pid = -1, + .telnetd_pid = -1 }); + goto out; + + out_close: + os_close_file(fd); + out_free: + kfree(port); + out: + mutex_unlock(&ports_mutex); + return dev; +} + +int port_wait(void *data) +{ + struct port_dev *dev = data; + struct connection *conn; + struct port_list *port = dev->port; + int fd; + + atomic_inc(&port->wait_count); + while (1) { + fd = -ERESTARTSYS; + if (wait_for_completion_interruptible(&port->done)) + goto out; + + spin_lock(&port->lock); + + conn = list_entry(port->connections.next, struct connection, + list); + list_del(&conn->list); + spin_unlock(&port->lock); + + os_shutdown_socket(conn->socket[0], 1, 1); + os_close_file(conn->socket[0]); + os_shutdown_socket(conn->socket[1], 1, 1); + os_close_file(conn->socket[1]); + + /* This is done here because freeing an IRQ can't be done + * within the IRQ handler. So, pipe_interrupt always ups + * the semaphore regardless of whether it got a successful + * connection. Then we loop here throwing out failed + * connections until a good one is found. + */ + free_irq(TELNETD_IRQ, conn); + + if (conn->fd >= 0) + break; + os_close_file(conn->fd); + kfree(conn); + } + + fd = conn->fd; + dev->helper_pid = conn->helper_pid; + dev->telnetd_pid = conn->telnetd_pid; + kfree(conn); + out: + atomic_dec(&port->wait_count); + return fd; +} + +void port_remove_dev(void *d) +{ + struct port_dev *dev = d; + + if (dev->helper_pid != -1) + os_kill_process(dev->helper_pid, 0); + if (dev->telnetd_pid != -1) + os_kill_process(dev->telnetd_pid, 1); + dev->helper_pid = -1; + dev->telnetd_pid = -1; +} + +void port_kern_free(void *d) +{ + struct port_dev *dev = d; + + port_remove_dev(dev); + kfree(dev); +} + +static void free_port(void) +{ + struct list_head *ele; + struct port_list *port; + + list_for_each(ele, &ports) { + port = list_entry(ele, struct port_list, list); + free_irq_by_fd(port->fd); + os_close_file(port->fd); + } +} + +__uml_exitcall(free_port); diff --git a/arch/um/drivers/port_user.c b/arch/um/drivers/port_user.c new file mode 100644 index 0000000..b49bf56 --- /dev/null +++ b/arch/um/drivers/port_user.c @@ -0,0 +1,203 @@ +/* + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{linux.intel,addtoit}.com) + * Licensed under the GPL + */ + +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> +#include <termios.h> +#include <unistd.h> +#include <netinet/in.h> +#include "chan_user.h" +#include "kern_constants.h" +#include "os.h" +#include "port.h" +#include "um_malloc.h" +#include "user.h" + +struct port_chan { + int raw; + struct termios tt; + void *kernel_data; + char dev[sizeof("32768\0")]; +}; + +static void *port_init(char *str, int device, const struct chan_opts *opts) +{ + struct port_chan *data; + void *kern_data; + char *end; + int port; + + if (*str != ':') { + printk(UM_KERN_ERR "port_init : channel type 'port' must " + "specify a port number\n"); + return NULL; + } + str++; + port = strtoul(str, &end, 0); + if ((*end != '\0') || (end == str)) { + printk(UM_KERN_ERR "port_init : couldn't parse port '%s'\n", + str); + return NULL; + } + + kern_data = port_data(port); + if (kern_data == NULL) + return NULL; + + data = uml_kmalloc(sizeof(*data), UM_GFP_KERNEL); + if (data == NULL) + goto err; + + *data = ((struct port_chan) { .raw = opts->raw, + .kernel_data = kern_data }); + sprintf(data->dev, "%d", port); + + return data; + err: + port_kern_free(kern_data); + return NULL; +} + +static void port_free(void *d) +{ + struct port_chan *data = d; + + port_kern_free(data->kernel_data); + kfree(data); +} + +static int port_open(int input, int output, int primary, void *d, + char **dev_out) +{ + struct port_chan *data = d; + int fd, err; + + fd = port_wait(data->kernel_data); + if ((fd >= 0) && data->raw) { + CATCH_EINTR(err = tcgetattr(fd, &data->tt)); + if (err) + return err; + + err = raw(fd); + if (err) + return err; + } + *dev_out = data->dev; + return fd; +} + +static void port_close(int fd, void *d) +{ + struct port_chan *data = d; + + port_remove_dev(data->kernel_data); + os_close_file(fd); +} + +const struct chan_ops port_ops = { + .type = "port", + .init = port_init, + .open = port_open, + .close = port_close, + .read = generic_read, + .write = generic_write, + .console_write = generic_console_write, + .window_size = generic_window_size, + .free = port_free, + .winch = 1, +}; + +int port_listen_fd(int port) +{ + struct sockaddr_in addr; + int fd, err, arg; + + fd = socket(PF_INET, SOCK_STREAM, 0); + if (fd == -1) + return -errno; + + arg = 1; + if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &arg, sizeof(arg)) < 0) { + err = -errno; + goto out; + } + + addr.sin_family = AF_INET; + addr.sin_port = htons(port); + addr.sin_addr.s_addr = htonl(INADDR_ANY); + if (bind(fd, (struct sockaddr *) &addr, sizeof(addr)) < 0) { + err = -errno; + goto out; + } + + if (listen(fd, 1) < 0) { + err = -errno; + goto out; + } + + err = os_set_fd_block(fd, 0); + if (err < 0) + goto out; + + return fd; + out: + close(fd); + return err; +} + +struct port_pre_exec_data { + int sock_fd; + int pipe_fd; +}; + +static void port_pre_exec(void *arg) +{ + struct port_pre_exec_data *data = arg; + + dup2(data->sock_fd, 0); + dup2(data->sock_fd, 1); + dup2(data->sock_fd, 2); + close(data->sock_fd); + dup2(data->pipe_fd, 3); + shutdown(3, SHUT_RD); + close(data->pipe_fd); +} + +int port_connection(int fd, int *socket, int *pid_out) +{ + int new, err; + char *argv[] = { "/usr/sbin/in.telnetd", "-L", + "/usr/lib/uml/port-helper", NULL }; + struct port_pre_exec_data data; + + new = accept(fd, NULL, 0); + if (new < 0) + return -errno; + + err = os_pipe(socket, 0, 0); + if (err < 0) + goto out_close; + + data = ((struct port_pre_exec_data) + { .sock_fd = new, + .pipe_fd = socket[1] }); + + err = run_helper(port_pre_exec, &data, argv); + if (err < 0) + goto out_shutdown; + + *pid_out = err; + return new; + + out_shutdown: + shutdown(socket[0], SHUT_RDWR); + close(socket[0]); + shutdown(socket[1], SHUT_RDWR); + close(socket[1]); + out_close: + close(new); + return err; +} diff --git a/arch/um/drivers/pty.c b/arch/um/drivers/pty.c new file mode 100644 index 0000000..1113911 --- /dev/null +++ b/arch/um/drivers/pty.c @@ -0,0 +1,167 @@ +/* + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <errno.h> +#include <fcntl.h> +#include <string.h> +#include <termios.h> +#include <sys/stat.h> +#include "chan_user.h" +#include "kern_constants.h" +#include "os.h" +#include "um_malloc.h" +#include "user.h" + +struct pty_chan { + void (*announce)(char *dev_name, int dev); + int dev; + int raw; + struct termios tt; + char dev_name[sizeof("/dev/pts/0123456\0")]; +}; + +static void *pty_chan_init(char *str, int device, const struct chan_opts *opts) +{ + struct pty_chan *data; + + data = uml_kmalloc(sizeof(*data), UM_GFP_KERNEL); + if (data == NULL) + return NULL; + + *data = ((struct pty_chan) { .announce = opts->announce, + .dev = device, + .raw = opts->raw }); + return data; +} + +static int pts_open(int input, int output, int primary, void *d, + char **dev_out) +{ + struct pty_chan *data = d; + char *dev; + int fd, err; + + fd = get_pty(); + if (fd < 0) { + err = -errno; + printk(UM_KERN_ERR "open_pts : Failed to open pts\n"); + return err; + } + + if (data->raw) { + CATCH_EINTR(err = tcgetattr(fd, &data->tt)); + if (err) + goto out_close; + + err = raw(fd); + if (err) + goto out_close; + } + + dev = ptsname(fd); + sprintf(data->dev_name, "%s", dev); + *dev_out = data->dev_name; + + if (data->announce) + (*data->announce)(dev, data->dev); + + return fd; + +out_close: + close(fd); + return err; +} + +static int getmaster(char *line) +{ + struct stat buf; + char *pty, *bank, *cp; + int master, err; + + pty = &line[strlen("/dev/ptyp")]; + for (bank = "pqrs"; *bank; bank++) { + line[strlen("/dev/pty")] = *bank; + *pty = '0'; + /* Did we hit the end ? */ + if ((stat(line, &buf) < 0) && (errno == ENOENT)) + break; + + for (cp = "0123456789abcdef"; *cp; cp++) { + *pty = *cp; + master = open(line, O_RDWR); + if (master >= 0) { + char *tp = &line[strlen("/dev/")]; + + /* verify slave side is usable */ + *tp = 't'; + err = access(line, R_OK | W_OK); + *tp = 'p'; + if (!err) + return master; + close(master); + } + } + } + + printk(UM_KERN_ERR "getmaster - no usable host pty devices\n"); + return -ENOENT; +} + +static int pty_open(int input, int output, int primary, void *d, + char **dev_out) +{ + struct pty_chan *data = d; + int fd, err; + char dev[sizeof("/dev/ptyxx\0")] = "/dev/ptyxx"; + + fd = getmaster(dev); + if (fd < 0) + return fd; + + if (data->raw) { + err = raw(fd); + if (err) { + close(fd); + return err; + } + } + + if (data->announce) + (*data->announce)(dev, data->dev); + + sprintf(data->dev_name, "%s", dev); + *dev_out = data->dev_name; + + return fd; +} + +const struct chan_ops pty_ops = { + .type = "pty", + .init = pty_chan_init, + .open = pty_open, + .close = generic_close, + .read = generic_read, + .write = generic_write, + .console_write = generic_console_write, + .window_size = generic_window_size, + .free = generic_free, + .winch = 0, +}; + +const struct chan_ops pts_ops = { + .type = "pts", + .init = pty_chan_init, + .open = pts_open, + .close = generic_close, + .read = generic_read, + .write = generic_write, + .console_write = generic_console_write, + .window_size = generic_window_size, + .free = generic_free, + .winch = 0, +}; diff --git a/arch/um/drivers/random.c b/arch/um/drivers/random.c new file mode 100644 index 0000000..6eabb70 --- /dev/null +++ b/arch/um/drivers/random.c @@ -0,0 +1,171 @@ +/* Copyright (C) 2005 - 2008 Jeff Dike <jdike@{linux.intel,addtoit}.com> */ + +/* Much of this ripped from drivers/char/hw_random.c, see there for other + * copyright. + * + * This software may be used and distributed according to the terms + * of the GNU General Public License, incorporated herein by reference. + */ +#include <linux/sched.h> +#include <linux/smp_lock.h> +#include <linux/module.h> +#include <linux/fs.h> +#include <linux/interrupt.h> +#include <linux/miscdevice.h> +#include <linux/delay.h> +#include <asm/uaccess.h> +#include "irq_kern.h" +#include "os.h" + +/* + * core module and version information + */ +#define RNG_VERSION "1.0.0" +#define RNG_MODULE_NAME "hw_random" + +#define RNG_MISCDEV_MINOR 183 /* official */ + +/* Changed at init time, in the non-modular case, and at module load + * time, in the module case. Presumably, the module subsystem + * protects against a module being loaded twice at the same time. + */ +static int random_fd = -1; +static DECLARE_WAIT_QUEUE_HEAD(host_read_wait); + +static int rng_dev_open (struct inode *inode, struct file *filp) +{ + cycle_kernel_lock(); + + /* enforce read-only access to this chrdev */ + if ((filp->f_mode & FMODE_READ) == 0) + return -EINVAL; + if ((filp->f_mode & FMODE_WRITE) != 0) + return -EINVAL; + + return 0; +} + +static atomic_t host_sleep_count = ATOMIC_INIT(0); + +static ssize_t rng_dev_read (struct file *filp, char __user *buf, size_t size, + loff_t *offp) +{ + u32 data; + int n, ret = 0, have_data; + + while (size) { + n = os_read_file(random_fd, &data, sizeof(data)); + if (n > 0) { + have_data = n; + while (have_data && size) { + if (put_user((u8) data, buf++)) { + ret = ret ? : -EFAULT; + break; + } + size--; + ret++; + have_data--; + data >>= 8; + } + } + else if (n == -EAGAIN) { + DECLARE_WAITQUEUE(wait, current); + + if (filp->f_flags & O_NONBLOCK) + return ret ? : -EAGAIN; + + atomic_inc(&host_sleep_count); + reactivate_fd(random_fd, RANDOM_IRQ); + add_sigio_fd(random_fd); + + add_wait_queue(&host_read_wait, &wait); + set_task_state(current, TASK_INTERRUPTIBLE); + + schedule(); + set_task_state(current, TASK_RUNNING); + remove_wait_queue(&host_read_wait, &wait); + + if (atomic_dec_and_test(&host_sleep_count)) { + ignore_sigio_fd(random_fd); + deactivate_fd(random_fd, RANDOM_IRQ); + } + } + else + return n; + + if (signal_pending (current)) + return ret ? : -ERESTARTSYS; + } + return ret; +} + +static const struct file_operations rng_chrdev_ops = { + .owner = THIS_MODULE, + .open = rng_dev_open, + .read = rng_dev_read, +}; + +/* rng_init shouldn't be called more than once at boot time */ +static struct miscdevice rng_miscdev = { + RNG_MISCDEV_MINOR, + RNG_MODULE_NAME, + &rng_chrdev_ops, +}; + +static irqreturn_t random_interrupt(int irq, void *data) +{ + wake_up(&host_read_wait); + + return IRQ_HANDLED; +} + +/* + * rng_init - initialize RNG module + */ +static int __init rng_init (void) +{ + int err; + + err = os_open_file("/dev/random", of_read(OPENFLAGS()), 0); + if (err < 0) + goto out; + + random_fd = err; + + err = um_request_irq(RANDOM_IRQ, random_fd, IRQ_READ, random_interrupt, + IRQF_DISABLED | IRQF_SAMPLE_RANDOM, "random", + NULL); + if (err) + goto err_out_cleanup_hw; + + sigio_broken(random_fd, 1); + + err = misc_register (&rng_miscdev); + if (err) { + printk (KERN_ERR RNG_MODULE_NAME ": misc device register " + "failed\n"); + goto err_out_cleanup_hw; + } +out: + return err; + +err_out_cleanup_hw: + os_close_file(random_fd); + random_fd = -1; + goto out; +} + +/* + * rng_cleanup - shutdown RNG module + */ +static void __exit rng_cleanup (void) +{ + os_close_file(random_fd); + misc_deregister (&rng_miscdev); +} + +module_init (rng_init); +module_exit (rng_cleanup); + +MODULE_DESCRIPTION("UML Host Random Number Generator (RNG) driver"); +MODULE_LICENSE("GPL"); diff --git a/arch/um/drivers/slip.h b/arch/um/drivers/slip.h new file mode 100644 index 0000000..c64f8c6 --- /dev/null +++ b/arch/um/drivers/slip.h @@ -0,0 +1,20 @@ +#ifndef __UM_SLIP_H +#define __UM_SLIP_H + +#include "slip_common.h" + +struct slip_data { + void *dev; + char name[sizeof("slnnnnn\0")]; + char *addr; + char *gate_addr; + int slave; + struct slip_proto slip; +}; + +extern const struct net_user_info slip_user_info; + +extern int slip_user_read(int fd, void *buf, int len, struct slip_data *pri); +extern int slip_user_write(int fd, void *buf, int len, struct slip_data *pri); + +#endif diff --git a/arch/um/drivers/slip_common.c b/arch/um/drivers/slip_common.c new file mode 100644 index 0000000..e89cfc6 --- /dev/null +++ b/arch/um/drivers/slip_common.c @@ -0,0 +1,54 @@ +#include <string.h> +#include "slip_common.h" +#include "net_user.h" + +int slip_proto_read(int fd, void *buf, int len, struct slip_proto *slip) +{ + int i, n, size, start; + + if(slip->more > 0){ + i = 0; + while(i < slip->more){ + size = slip_unesc(slip->ibuf[i++], slip->ibuf, + &slip->pos, &slip->esc); + if(size){ + memcpy(buf, slip->ibuf, size); + memmove(slip->ibuf, &slip->ibuf[i], + slip->more - i); + slip->more = slip->more - i; + return size; + } + } + slip->more = 0; + } + + n = net_read(fd, &slip->ibuf[slip->pos], + sizeof(slip->ibuf) - slip->pos); + if(n <= 0) + return n; + + start = slip->pos; + for(i = 0; i < n; i++){ + size = slip_unesc(slip->ibuf[start + i], slip->ibuf,&slip->pos, + &slip->esc); + if(size){ + memcpy(buf, slip->ibuf, size); + memmove(slip->ibuf, &slip->ibuf[start+i+1], + n - (i + 1)); + slip->more = n - (i + 1); + return size; + } + } + return 0; +} + +int slip_proto_write(int fd, void *buf, int len, struct slip_proto *slip) +{ + int actual, n; + + actual = slip_esc(buf, slip->obuf, len); + n = net_write(fd, slip->obuf, actual); + if(n < 0) + return n; + else return len; +} diff --git a/arch/um/drivers/slip_common.h b/arch/um/drivers/slip_common.h new file mode 100644 index 0000000..d574e0a --- /dev/null +++ b/arch/um/drivers/slip_common.h @@ -0,0 +1,105 @@ +#ifndef __UM_SLIP_COMMON_H +#define __UM_SLIP_COMMON_H + +#define BUF_SIZE 1500 + /* two bytes each for a (pathological) max packet of escaped chars + * + * terminating END char + initial END char */ +#define ENC_BUF_SIZE (2 * BUF_SIZE + 2) + +/* SLIP protocol characters. */ +#define SLIP_END 0300 /* indicates end of frame */ +#define SLIP_ESC 0333 /* indicates byte stuffing */ +#define SLIP_ESC_END 0334 /* ESC ESC_END means END 'data' */ +#define SLIP_ESC_ESC 0335 /* ESC ESC_ESC means ESC 'data' */ + +static inline int slip_unesc(unsigned char c, unsigned char *buf, int *pos, + int *esc) +{ + int ret; + + switch(c){ + case SLIP_END: + *esc = 0; + ret=*pos; + *pos=0; + return(ret); + case SLIP_ESC: + *esc = 1; + return(0); + case SLIP_ESC_ESC: + if(*esc){ + *esc = 0; + c = SLIP_ESC; + } + break; + case SLIP_ESC_END: + if(*esc){ + *esc = 0; + c = SLIP_END; + } + break; + } + buf[(*pos)++] = c; + return(0); +} + +static inline int slip_esc(unsigned char *s, unsigned char *d, int len) +{ + unsigned char *ptr = d; + unsigned char c; + + /* + * Send an initial END character to flush out any + * data that may have accumulated in the receiver + * due to line noise. + */ + + *ptr++ = SLIP_END; + + /* + * For each byte in the packet, send the appropriate + * character sequence, according to the SLIP protocol. + */ + + while (len-- > 0) { + switch(c = *s++) { + case SLIP_END: + *ptr++ = SLIP_ESC; + *ptr++ = SLIP_ESC_END; + break; + case SLIP_ESC: + *ptr++ = SLIP_ESC; + *ptr++ = SLIP_ESC_ESC; + break; + default: + *ptr++ = c; + break; + } + } + *ptr++ = SLIP_END; + return (ptr - d); +} + +struct slip_proto { + unsigned char ibuf[ENC_BUF_SIZE]; + unsigned char obuf[ENC_BUF_SIZE]; + int more; /* more data: do not read fd until ibuf has been drained */ + int pos; + int esc; +}; + +static inline void slip_proto_init(struct slip_proto * slip) +{ + memset(slip->ibuf, 0, sizeof(slip->ibuf)); + memset(slip->obuf, 0, sizeof(slip->obuf)); + slip->more = 0; + slip->pos = 0; + slip->esc = 0; +} + +extern int slip_proto_read(int fd, void *buf, int len, + struct slip_proto *slip); +extern int slip_proto_write(int fd, void *buf, int len, + struct slip_proto *slip); + +#endif diff --git a/arch/um/drivers/slip_kern.c b/arch/um/drivers/slip_kern.c new file mode 100644 index 0000000..d19faec --- /dev/null +++ b/arch/um/drivers/slip_kern.c @@ -0,0 +1,94 @@ +/* + * Copyright (C) 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL. + */ + +#include <linux/if_arp.h> +#include <linux/init.h> +#include <linux/netdevice.h> +#include "net_kern.h" +#include "slip.h" + +struct slip_init { + char *gate_addr; +}; + +static void slip_init(struct net_device *dev, void *data) +{ + struct uml_net_private *private; + struct slip_data *spri; + struct slip_init *init = data; + + private = dev->priv; + spri = (struct slip_data *) private->user; + + memset(spri->name, 0, sizeof(spri->name)); + spri->addr = NULL; + spri->gate_addr = init->gate_addr; + spri->slave = -1; + spri->dev = dev; + + slip_proto_init(&spri->slip); + + dev->init = NULL; + dev->hard_header_len = 0; + dev->header_ops = NULL; + dev->addr_len = 0; + dev->type = ARPHRD_SLIP; + dev->tx_queue_len = 256; + dev->flags = IFF_NOARP; + printk("SLIP backend - SLIP IP = %s\n", spri->gate_addr); +} + +static unsigned short slip_protocol(struct sk_buff *skbuff) +{ + return htons(ETH_P_IP); +} + +static int slip_read(int fd, struct sk_buff *skb, struct uml_net_private *lp) +{ + return slip_user_read(fd, skb_mac_header(skb), skb->dev->mtu, + (struct slip_data *) &lp->user); +} + +static int slip_write(int fd, struct sk_buff *skb, struct uml_net_private *lp) +{ + return slip_user_write(fd, skb->data, skb->len, + (struct slip_data *) &lp->user); +} + +static const struct net_kern_info slip_kern_info = { + .init = slip_init, + .protocol = slip_protocol, + .read = slip_read, + .write = slip_write, +}; + +static int slip_setup(char *str, char **mac_out, void *data) +{ + struct slip_init *init = data; + + *init = ((struct slip_init) { .gate_addr = NULL }); + + if (str[0] != '\0') + init->gate_addr = str; + return 1; +} + +static struct transport slip_transport = { + .list = LIST_HEAD_INIT(slip_transport.list), + .name = "slip", + .setup = slip_setup, + .user = &slip_user_info, + .kern = &slip_kern_info, + .private_size = sizeof(struct slip_data), + .setup_size = sizeof(struct slip_init), +}; + +static int register_slip(void) +{ + register_transport(&slip_transport); + return 0; +} + +late_initcall(register_slip); diff --git a/arch/um/drivers/slip_user.c b/arch/um/drivers/slip_user.c new file mode 100644 index 0000000..a1c2d2c --- /dev/null +++ b/arch/um/drivers/slip_user.c @@ -0,0 +1,254 @@ +/* + * Copyright (C) 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <errno.h> +#include <fcntl.h> +#include <string.h> +#include <sys/termios.h> +#include <sys/wait.h> +#include "kern_constants.h" +#include "net_user.h" +#include "os.h" +#include "slip.h" +#include "um_malloc.h" +#include "user.h" + +static int slip_user_init(void *data, void *dev) +{ + struct slip_data *pri = data; + + pri->dev = dev; + return 0; +} + +static int set_up_tty(int fd) +{ + int i; + struct termios tios; + + if (tcgetattr(fd, &tios) < 0) { + printk(UM_KERN_ERR "could not get initial terminal " + "attributes\n"); + return -1; + } + + tios.c_cflag = CS8 | CREAD | HUPCL | CLOCAL; + tios.c_iflag = IGNBRK | IGNPAR; + tios.c_oflag = 0; + tios.c_lflag = 0; + for (i = 0; i < NCCS; i++) + tios.c_cc[i] = 0; + tios.c_cc[VMIN] = 1; + tios.c_cc[VTIME] = 0; + + cfsetospeed(&tios, B38400); + cfsetispeed(&tios, B38400); + + if (tcsetattr(fd, TCSAFLUSH, &tios) < 0) { + printk(UM_KERN_ERR "failed to set terminal attributes\n"); + return -1; + } + return 0; +} + +struct slip_pre_exec_data { + int stdin; + int stdout; + int close_me; +}; + +static void slip_pre_exec(void *arg) +{ + struct slip_pre_exec_data *data = arg; + + if (data->stdin >= 0) + dup2(data->stdin, 0); + dup2(data->stdout, 1); + if (data->close_me >= 0) + close(data->close_me); +} + +static int slip_tramp(char **argv, int fd) +{ + struct slip_pre_exec_data pe_data; + char *output; + int pid, fds[2], err, output_len; + + err = os_pipe(fds, 1, 0); + if (err < 0) { + printk(UM_KERN_ERR "slip_tramp : pipe failed, err = %d\n", + -err); + goto out; + } + + err = 0; + pe_data.stdin = fd; + pe_data.stdout = fds[1]; + pe_data.close_me = fds[0]; + err = run_helper(slip_pre_exec, &pe_data, argv); + if (err < 0) + goto out_close; + pid = err; + + output_len = UM_KERN_PAGE_SIZE; + output = uml_kmalloc(output_len, UM_GFP_KERNEL); + if (output == NULL) { + printk(UM_KERN_ERR "slip_tramp : failed to allocate output " + "buffer\n"); + os_kill_process(pid, 1); + err = -ENOMEM; + goto out_free; + } + + close(fds[1]); + read_output(fds[0], output, output_len); + printk("%s", output); + + err = helper_wait(pid); + close(fds[0]); + +out_free: + kfree(output); + return err; + +out_close: + close(fds[0]); + close(fds[1]); +out: + return err; +} + +static int slip_open(void *data) +{ + struct slip_data *pri = data; + char version_buf[sizeof("nnnnn\0")]; + char gate_buf[sizeof("nnn.nnn.nnn.nnn\0")]; + char *argv[] = { "uml_net", version_buf, "slip", "up", gate_buf, + NULL }; + int sfd, mfd, err; + + err = get_pty(); + if (err < 0) { + printk(UM_KERN_ERR "slip-open : Failed to open pty, err = %d\n", + -err); + goto out; + } + mfd = err; + + err = open(ptsname(mfd), O_RDWR, 0); + if (err < 0) { + printk(UM_KERN_ERR "Couldn't open tty for slip line, " + "err = %d\n", -err); + goto out_close; + } + sfd = err; + + if (set_up_tty(sfd)) + goto out_close2; + + pri->slave = sfd; + pri->slip.pos = 0; + pri->slip.esc = 0; + if (pri->gate_addr != NULL) { + sprintf(version_buf, "%d", UML_NET_VERSION); + strcpy(gate_buf, pri->gate_addr); + + err = slip_tramp(argv, sfd); + + if (err < 0) { + printk(UM_KERN_ERR "slip_tramp failed - err = %d\n", + -err); + goto out_close2; + } + err = os_get_ifname(pri->slave, pri->name); + if (err < 0) { + printk(UM_KERN_ERR "get_ifname failed, err = %d\n", + -err); + goto out_close2; + } + iter_addresses(pri->dev, open_addr, pri->name); + } + else { + err = os_set_slip(sfd); + if (err < 0) { + printk(UM_KERN_ERR "Failed to set slip discipline " + "encapsulation - err = %d\n", -err); + goto out_close2; + } + } + return mfd; +out_close2: + close(sfd); +out_close: + close(mfd); +out: + return err; +} + +static void slip_close(int fd, void *data) +{ + struct slip_data *pri = data; + char version_buf[sizeof("nnnnn\0")]; + char *argv[] = { "uml_net", version_buf, "slip", "down", pri->name, + NULL }; + int err; + + if (pri->gate_addr != NULL) + iter_addresses(pri->dev, close_addr, pri->name); + + sprintf(version_buf, "%d", UML_NET_VERSION); + + err = slip_tramp(argv, pri->slave); + + if (err != 0) + printk(UM_KERN_ERR "slip_tramp failed - errno = %d\n", -err); + close(fd); + close(pri->slave); + pri->slave = -1; +} + +int slip_user_read(int fd, void *buf, int len, struct slip_data *pri) +{ + return slip_proto_read(fd, buf, len, &pri->slip); +} + +int slip_user_write(int fd, void *buf, int len, struct slip_data *pri) +{ + return slip_proto_write(fd, buf, len, &pri->slip); +} + +static void slip_add_addr(unsigned char *addr, unsigned char *netmask, + void *data) +{ + struct slip_data *pri = data; + + if (pri->slave < 0) + return; + open_addr(addr, netmask, pri->name); +} + +static void slip_del_addr(unsigned char *addr, unsigned char *netmask, + void *data) +{ + struct slip_data *pri = data; + + if (pri->slave < 0) + return; + close_addr(addr, netmask, pri->name); +} + +const struct net_user_info slip_user_info = { + .init = slip_user_init, + .open = slip_open, + .close = slip_close, + .remove = NULL, + .add_address = slip_add_addr, + .delete_address = slip_del_addr, + .mtu = BUF_SIZE, + .max_packet = BUF_SIZE, +}; diff --git a/arch/um/drivers/slirp.h b/arch/um/drivers/slirp.h new file mode 100644 index 0000000..89ccf83 --- /dev/null +++ b/arch/um/drivers/slirp.h @@ -0,0 +1,33 @@ +#ifndef __UM_SLIRP_H +#define __UM_SLIRP_H + +#include "slip_common.h" + +#define SLIRP_MAX_ARGS 100 +/* + * XXX this next definition is here because I don't understand why this + * initializer doesn't work in slirp_kern.c: + * + * argv : { init->argv[ 0 ... SLIRP_MAX_ARGS-1 ] }, + * + * or why I can't typecast like this: + * + * argv : (char* [SLIRP_MAX_ARGS])(init->argv), + */ +struct arg_list_dummy_wrapper { char *argv[SLIRP_MAX_ARGS]; }; + +struct slirp_data { + void *dev; + struct arg_list_dummy_wrapper argw; + int pid; + int slave; + struct slip_proto slip; +}; + +extern const struct net_user_info slirp_user_info; + +extern int slirp_user_read(int fd, void *buf, int len, struct slirp_data *pri); +extern int slirp_user_write(int fd, void *buf, int len, + struct slirp_data *pri); + +#endif diff --git a/arch/um/drivers/slirp_kern.c b/arch/um/drivers/slirp_kern.c new file mode 100644 index 0000000..d987af2 --- /dev/null +++ b/arch/um/drivers/slirp_kern.c @@ -0,0 +1,121 @@ +/* + * Copyright (C) 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL. + */ + +#include <linux/if_arp.h> +#include "linux/init.h" +#include <linux/netdevice.h> +#include <linux/string.h> +#include "net_kern.h" +#include "net_user.h" +#include "slirp.h" + +struct slirp_init { + struct arg_list_dummy_wrapper argw; /* XXX should be simpler... */ +}; + +void slirp_init(struct net_device *dev, void *data) +{ + struct uml_net_private *private; + struct slirp_data *spri; + struct slirp_init *init = data; + int i; + + private = dev->priv; + spri = (struct slirp_data *) private->user; + + spri->argw = init->argw; + spri->pid = -1; + spri->slave = -1; + spri->dev = dev; + + slip_proto_init(&spri->slip); + + dev->init = NULL; + dev->hard_header_len = 0; + dev->header_ops = NULL; + dev->addr_len = 0; + dev->type = ARPHRD_SLIP; + dev->tx_queue_len = 256; + dev->flags = IFF_NOARP; + printk("SLIRP backend - command line:"); + for (i = 0; spri->argw.argv[i] != NULL; i++) + printk(" '%s'",spri->argw.argv[i]); + printk("\n"); +} + +static unsigned short slirp_protocol(struct sk_buff *skbuff) +{ + return htons(ETH_P_IP); +} + +static int slirp_read(int fd, struct sk_buff *skb, struct uml_net_private *lp) +{ + return slirp_user_read(fd, skb_mac_header(skb), skb->dev->mtu, + (struct slirp_data *) &lp->user); +} + +static int slirp_write(int fd, struct sk_buff *skb, struct uml_net_private *lp) +{ + return slirp_user_write(fd, skb->data, skb->len, + (struct slirp_data *) &lp->user); +} + +const struct net_kern_info slirp_kern_info = { + .init = slirp_init, + .protocol = slirp_protocol, + .read = slirp_read, + .write = slirp_write, +}; + +static int slirp_setup(char *str, char **mac_out, void *data) +{ + struct slirp_init *init = data; + int i=0; + + *init = ((struct slirp_init) { .argw = { { "slirp", NULL } } }); + + str = split_if_spec(str, mac_out, NULL); + + if (str == NULL) /* no command line given after MAC addr */ + return 1; + + do { + if (i >= SLIRP_MAX_ARGS - 1) { + printk(KERN_WARNING "slirp_setup: truncating slirp " + "arguments\n"); + break; + } + init->argw.argv[i++] = str; + while(*str && *str!=',') { + if (*str == '_') + *str=' '; + str++; + } + if (*str != ',') + break; + *str++ = '\0'; + } while (1); + + init->argw.argv[i] = NULL; + return 1; +} + +static struct transport slirp_transport = { + .list = LIST_HEAD_INIT(slirp_transport.list), + .name = "slirp", + .setup = slirp_setup, + .user = &slirp_user_info, + .kern = &slirp_kern_info, + .private_size = sizeof(struct slirp_data), + .setup_size = sizeof(struct slirp_init), +}; + +static int register_slirp(void) +{ + register_transport(&slirp_transport); + return 0; +} + +late_initcall(register_slirp); diff --git a/arch/um/drivers/slirp_user.c b/arch/um/drivers/slirp_user.c new file mode 100644 index 0000000..a0ada8f --- /dev/null +++ b/arch/um/drivers/slirp_user.c @@ -0,0 +1,127 @@ +/* + * Copyright (C) 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL. + */ + +#include <unistd.h> +#include <errno.h> +#include <string.h> +#include <sys/wait.h> +#include "kern_constants.h" +#include "net_user.h" +#include "os.h" +#include "slirp.h" +#include "user.h" + +static int slirp_user_init(void *data, void *dev) +{ + struct slirp_data *pri = data; + + pri->dev = dev; + return 0; +} + +struct slirp_pre_exec_data { + int stdin; + int stdout; +}; + +static void slirp_pre_exec(void *arg) +{ + struct slirp_pre_exec_data *data = arg; + + if (data->stdin != -1) + dup2(data->stdin, 0); + if (data->stdout != -1) + dup2(data->stdout, 1); +} + +static int slirp_tramp(char **argv, int fd) +{ + struct slirp_pre_exec_data pe_data; + int pid; + + pe_data.stdin = fd; + pe_data.stdout = fd; + pid = run_helper(slirp_pre_exec, &pe_data, argv); + + return pid; +} + +static int slirp_open(void *data) +{ + struct slirp_data *pri = data; + int fds[2], pid, err; + + err = os_pipe(fds, 1, 1); + if (err) + return err; + + err = slirp_tramp(pri->argw.argv, fds[1]); + if (err < 0) { + printk(UM_KERN_ERR "slirp_tramp failed - errno = %d\n", -err); + goto out; + } + pid = err; + + pri->slave = fds[1]; + pri->slip.pos = 0; + pri->slip.esc = 0; + pri->pid = err; + + return fds[0]; +out: + close(fds[0]); + close(fds[1]); + return err; +} + +static void slirp_close(int fd, void *data) +{ + struct slirp_data *pri = data; + int err; + + close(fd); + close(pri->slave); + + pri->slave = -1; + + if (pri->pid<1) { + printk(UM_KERN_ERR "slirp_close: no child process to shut " + "down\n"); + return; + } + +#if 0 + if (kill(pri->pid, SIGHUP)<0) { + printk(UM_KERN_ERR "slirp_close: sending hangup to %d failed " + "(%d)\n", pri->pid, errno); + } +#endif + err = helper_wait(pri->pid); + if (err < 0) + return; + + pri->pid = -1; +} + +int slirp_user_read(int fd, void *buf, int len, struct slirp_data *pri) +{ + return slip_proto_read(fd, buf, len, &pri->slip); +} + +int slirp_user_write(int fd, void *buf, int len, struct slirp_data *pri) +{ + return slip_proto_write(fd, buf, len, &pri->slip); +} + +const struct net_user_info slirp_user_info = { + .init = slirp_user_init, + .open = slirp_open, + .close = slirp_close, + .remove = NULL, + .add_address = NULL, + .delete_address = NULL, + .mtu = BUF_SIZE, + .max_packet = BUF_SIZE, +}; diff --git a/arch/um/drivers/ssl.c b/arch/um/drivers/ssl.c new file mode 100644 index 0000000..f1786e6 --- /dev/null +++ b/arch/um/drivers/ssl.c @@ -0,0 +1,231 @@ +/* + * Copyright (C) 2000, 2002 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#include "linux/fs.h" +#include "linux/tty.h" +#include "linux/tty_driver.h" +#include "linux/major.h" +#include "linux/mm.h" +#include "linux/init.h" +#include "linux/console.h" +#include "asm/termbits.h" +#include "asm/irq.h" +#include "line.h" +#include "ssl.h" +#include "chan_kern.h" +#include "kern.h" +#include "init.h" +#include "irq_user.h" +#include "mconsole_kern.h" + +static const int ssl_version = 1; + +/* Referenced only by tty_driver below - presumably it's locked correctly + * by the tty driver. + */ + +static struct tty_driver *ssl_driver; + +#define NR_PORTS 64 + +static void ssl_announce(char *dev_name, int dev) +{ + printk(KERN_INFO "Serial line %d assigned device '%s'\n", dev, + dev_name); +} + +/* Almost const, except that xterm_title may be changed in an initcall */ +static struct chan_opts opts = { + .announce = ssl_announce, + .xterm_title = "Serial Line #%d", + .raw = 1, +}; + +static int ssl_config(char *str, char **error_out); +static int ssl_get_config(char *dev, char *str, int size, char **error_out); +static int ssl_remove(int n, char **error_out); + + +/* Const, except for .mc.list */ +static struct line_driver driver = { + .name = "UML serial line", + .device_name = "ttyS", + .major = TTY_MAJOR, + .minor_start = 64, + .type = TTY_DRIVER_TYPE_SERIAL, + .subtype = 0, + .read_irq = SSL_IRQ, + .read_irq_name = "ssl", + .write_irq = SSL_WRITE_IRQ, + .write_irq_name = "ssl-write", + .mc = { + .list = LIST_HEAD_INIT(driver.mc.list), + .name = "ssl", + .config = ssl_config, + .get_config = ssl_get_config, + .id = line_id, + .remove = ssl_remove, + }, +}; + +/* The array is initialized by line_init, at initcall time. The + * elements are locked individually as needed. + */ +static struct line serial_lines[NR_PORTS] = + { [0 ... NR_PORTS - 1] = LINE_INIT(CONFIG_SSL_CHAN, &driver) }; + +static int ssl_config(char *str, char **error_out) +{ + return line_config(serial_lines, ARRAY_SIZE(serial_lines), str, &opts, + error_out); +} + +static int ssl_get_config(char *dev, char *str, int size, char **error_out) +{ + return line_get_config(dev, serial_lines, ARRAY_SIZE(serial_lines), str, + size, error_out); +} + +static int ssl_remove(int n, char **error_out) +{ + return line_remove(serial_lines, ARRAY_SIZE(serial_lines), n, + error_out); +} + +static int ssl_open(struct tty_struct *tty, struct file *filp) +{ + int err = line_open(serial_lines, tty); + + if (err) + printk(KERN_ERR "Failed to open serial line %d, err = %d\n", + tty->index, err); + + return err; +} + +#if 0 +static void ssl_flush_buffer(struct tty_struct *tty) +{ + return; +} + +static void ssl_stop(struct tty_struct *tty) +{ + printk(KERN_ERR "Someone should implement ssl_stop\n"); +} + +static void ssl_start(struct tty_struct *tty) +{ + printk(KERN_ERR "Someone should implement ssl_start\n"); +} + +void ssl_hangup(struct tty_struct *tty) +{ +} +#endif + +static const struct tty_operations ssl_ops = { + .open = ssl_open, + .close = line_close, + .write = line_write, + .put_char = line_put_char, + .write_room = line_write_room, + .chars_in_buffer = line_chars_in_buffer, + .flush_buffer = line_flush_buffer, + .flush_chars = line_flush_chars, + .set_termios = line_set_termios, + .ioctl = line_ioctl, + .throttle = line_throttle, + .unthrottle = line_unthrottle, +#if 0 + .stop = ssl_stop, + .start = ssl_start, + .hangup = ssl_hangup, +#endif +}; + +/* Changed by ssl_init and referenced by ssl_exit, which are both serialized + * by being an initcall and exitcall, respectively. + */ +static int ssl_init_done = 0; + +static void ssl_console_write(struct console *c, const char *string, + unsigned len) +{ + struct line *line = &serial_lines[c->index]; + unsigned long flags; + + spin_lock_irqsave(&line->lock, flags); + console_write_chan(&line->chan_list, string, len); + spin_unlock_irqrestore(&line->lock, flags); +} + +static struct tty_driver *ssl_console_device(struct console *c, int *index) +{ + *index = c->index; + return ssl_driver; +} + +static int ssl_console_setup(struct console *co, char *options) +{ + struct line *line = &serial_lines[co->index]; + + return console_open_chan(line, co); +} + +/* No locking for register_console call - relies on single-threaded initcalls */ +static struct console ssl_cons = { + .name = "ttyS", + .write = ssl_console_write, + .device = ssl_console_device, + .setup = ssl_console_setup, + .flags = CON_PRINTBUFFER|CON_ANYTIME, + .index = -1, +}; + +static int ssl_init(void) +{ + char *new_title; + + printk(KERN_INFO "Initializing software serial port version %d\n", + ssl_version); + ssl_driver = register_lines(&driver, &ssl_ops, serial_lines, + ARRAY_SIZE(serial_lines)); + + new_title = add_xterm_umid(opts.xterm_title); + if (new_title != NULL) + opts.xterm_title = new_title; + + lines_init(serial_lines, ARRAY_SIZE(serial_lines), &opts); + + ssl_init_done = 1; + register_console(&ssl_cons); + return 0; +} +late_initcall(ssl_init); + +static void ssl_exit(void) +{ + if (!ssl_init_done) + return; + close_lines(serial_lines, ARRAY_SIZE(serial_lines)); +} +__uml_exitcall(ssl_exit); + +static int ssl_chan_setup(char *str) +{ + char *error; + int ret; + + ret = line_setup(serial_lines, ARRAY_SIZE(serial_lines), str, &error); + if(ret < 0) + printk(KERN_ERR "Failed to set up serial line with " + "configuration string \"%s\" : %s\n", str, error); + + return 1; +} + +__setup("ssl", ssl_chan_setup); +__channel_help(ssl_chan_setup, "ssl"); diff --git a/arch/um/drivers/ssl.h b/arch/um/drivers/ssl.h new file mode 100644 index 0000000..98412aa --- /dev/null +++ b/arch/um/drivers/ssl.h @@ -0,0 +1,23 @@ +/* + * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __SSL_H__ +#define __SSL_H__ + +extern int ssl_read(int fd, int line); +extern void ssl_receive_char(int line, char ch); + +#endif + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff --git a/arch/um/drivers/stderr_console.c b/arch/um/drivers/stderr_console.c new file mode 100644 index 0000000..d07a97f --- /dev/null +++ b/arch/um/drivers/stderr_console.c @@ -0,0 +1,62 @@ +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/console.h> + +#include "chan_user.h" + +/* ----------------------------------------------------------------------------- */ +/* trivial console driver -- simply dump everything to stderr */ + +/* + * Don't register by default -- as this registers very early in the + * boot process it becomes the default console. + * + * Initialized at init time. + */ +static int use_stderr_console = 0; + +static void stderr_console_write(struct console *console, const char *string, + unsigned len) +{ + generic_write(2 /* stderr */, string, len, NULL); +} + +static struct console stderr_console = { + .name = "stderr", + .write = stderr_console_write, + .flags = CON_PRINTBUFFER, +}; + +static int __init stderr_console_init(void) +{ + if (use_stderr_console) + register_console(&stderr_console); + return 0; +} +console_initcall(stderr_console_init); + +static int stderr_setup(char *str) +{ + if (!str) + return 0; + use_stderr_console = simple_strtoul(str,&str,0); + return 1; +} +__setup("stderr=", stderr_setup); + +/* The previous behavior of not unregistering led to /dev/console being + * impossible to open. My FC5 filesystem started having init die, and the + * system panicing because of this. Unregistering causes the real + * console to become the default console, and /dev/console can then be + * opened. Making this an initcall makes this happen late enough that + * there is no added value in dumping everything to stderr, and the + * normal console is good enough to show you all available output. + */ +static int __init unregister_stderr(void) +{ + unregister_console(&stderr_console); + + return 0; +} + +__initcall(unregister_stderr); diff --git a/arch/um/drivers/stdio_console.c b/arch/um/drivers/stdio_console.c new file mode 100644 index 0000000..49266f6 --- /dev/null +++ b/arch/um/drivers/stdio_console.c @@ -0,0 +1,204 @@ +/* + * Copyright (C) 2000, 2001 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#include "linux/posix_types.h" +#include "linux/tty.h" +#include "linux/tty_flip.h" +#include "linux/types.h" +#include "linux/major.h" +#include "linux/kdev_t.h" +#include "linux/console.h" +#include "linux/string.h" +#include "linux/sched.h" +#include "linux/list.h" +#include "linux/init.h" +#include "linux/interrupt.h" +#include "linux/slab.h" +#include "linux/hardirq.h" +#include "asm/current.h" +#include "asm/irq.h" +#include "stdio_console.h" +#include "line.h" +#include "chan_kern.h" +#include "irq_user.h" +#include "mconsole_kern.h" +#include "init.h" + +#define MAX_TTYS (16) + +/* Referenced only by tty_driver below - presumably it's locked correctly + * by the tty driver. + */ + +static struct tty_driver *console_driver; + +static void stdio_announce(char *dev_name, int dev) +{ + printk(KERN_INFO "Virtual console %d assigned device '%s'\n", dev, + dev_name); +} + +/* Almost const, except that xterm_title may be changed in an initcall */ +static struct chan_opts opts = { + .announce = stdio_announce, + .xterm_title = "Virtual Console #%d", + .raw = 1, +}; + +static int con_config(char *str, char **error_out); +static int con_get_config(char *dev, char *str, int size, char **error_out); +static int con_remove(int n, char **con_remove); + + +/* Const, except for .mc.list */ +static struct line_driver driver = { + .name = "UML console", + .device_name = "tty", + .major = TTY_MAJOR, + .minor_start = 0, + .type = TTY_DRIVER_TYPE_CONSOLE, + .subtype = SYSTEM_TYPE_CONSOLE, + .read_irq = CONSOLE_IRQ, + .read_irq_name = "console", + .write_irq = CONSOLE_WRITE_IRQ, + .write_irq_name = "console-write", + .mc = { + .list = LIST_HEAD_INIT(driver.mc.list), + .name = "con", + .config = con_config, + .get_config = con_get_config, + .id = line_id, + .remove = con_remove, + }, +}; + +/* The array is initialized by line_init, at initcall time. The + * elements are locked individually as needed. + */ +static struct line vts[MAX_TTYS] = { LINE_INIT(CONFIG_CON_ZERO_CHAN, &driver), + [ 1 ... MAX_TTYS - 1 ] = + LINE_INIT(CONFIG_CON_CHAN, &driver) }; + +static int con_config(char *str, char **error_out) +{ + return line_config(vts, ARRAY_SIZE(vts), str, &opts, error_out); +} + +static int con_get_config(char *dev, char *str, int size, char **error_out) +{ + return line_get_config(dev, vts, ARRAY_SIZE(vts), str, size, error_out); +} + +static int con_remove(int n, char **error_out) +{ + return line_remove(vts, ARRAY_SIZE(vts), n, error_out); +} + +static int con_open(struct tty_struct *tty, struct file *filp) +{ + int err = line_open(vts, tty); + if (err) + printk(KERN_ERR "Failed to open console %d, err = %d\n", + tty->index, err); + + return err; +} + +/* Set in an initcall, checked in an exitcall */ +static int con_init_done = 0; + +static const struct tty_operations console_ops = { + .open = con_open, + .close = line_close, + .write = line_write, + .put_char = line_put_char, + .write_room = line_write_room, + .chars_in_buffer = line_chars_in_buffer, + .flush_buffer = line_flush_buffer, + .flush_chars = line_flush_chars, + .set_termios = line_set_termios, + .ioctl = line_ioctl, + .throttle = line_throttle, + .unthrottle = line_unthrottle, +}; + +static void uml_console_write(struct console *console, const char *string, + unsigned len) +{ + struct line *line = &vts[console->index]; + unsigned long flags; + + spin_lock_irqsave(&line->lock, flags); + console_write_chan(&line->chan_list, string, len); + spin_unlock_irqrestore(&line->lock, flags); +} + +static struct tty_driver *uml_console_device(struct console *c, int *index) +{ + *index = c->index; + return console_driver; +} + +static int uml_console_setup(struct console *co, char *options) +{ + struct line *line = &vts[co->index]; + + return console_open_chan(line, co); +} + +/* No locking for register_console call - relies on single-threaded initcalls */ +static struct console stdiocons = { + .name = "tty", + .write = uml_console_write, + .device = uml_console_device, + .setup = uml_console_setup, + .flags = CON_PRINTBUFFER|CON_ANYTIME, + .index = -1, +}; + +static int stdio_init(void) +{ + char *new_title; + + console_driver = register_lines(&driver, &console_ops, vts, + ARRAY_SIZE(vts)); + if (console_driver == NULL) + return -1; + printk(KERN_INFO "Initialized stdio console driver\n"); + + new_title = add_xterm_umid(opts.xterm_title); + if(new_title != NULL) + opts.xterm_title = new_title; + + lines_init(vts, ARRAY_SIZE(vts), &opts); + + con_init_done = 1; + register_console(&stdiocons); + return 0; +} +late_initcall(stdio_init); + +static void console_exit(void) +{ + if (!con_init_done) + return; + close_lines(vts, ARRAY_SIZE(vts)); +} +__uml_exitcall(console_exit); + +static int console_chan_setup(char *str) +{ + char *error; + int ret; + + ret = line_setup(vts, ARRAY_SIZE(vts), str, &error); + if(ret < 0) + printk(KERN_ERR "Failed to set up console with " + "configuration string \"%s\" : %s\n", str, error); + + return 1; +} +__setup("con", console_chan_setup); +__channel_help(console_chan_setup, "con"); diff --git a/arch/um/drivers/stdio_console.h b/arch/um/drivers/stdio_console.h new file mode 100644 index 0000000..505a3d5 --- /dev/null +++ b/arch/um/drivers/stdio_console.h @@ -0,0 +1,21 @@ +/* + * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __STDIO_CONSOLE_H +#define __STDIO_CONSOLE_H + +extern void save_console_flags(void); +#endif + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff --git a/arch/um/drivers/tty.c b/arch/um/drivers/tty.c new file mode 100644 index 0000000..495858a --- /dev/null +++ b/arch/um/drivers/tty.c @@ -0,0 +1,83 @@ +/* + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{linux.intel,addtoit}.com) + * Licensed under the GPL + */ + +#include <errno.h> +#include <fcntl.h> +#include <termios.h> +#include "chan_user.h" +#include "kern_constants.h" +#include "os.h" +#include "um_malloc.h" +#include "user.h" + +struct tty_chan { + char *dev; + int raw; + struct termios tt; +}; + +static void *tty_chan_init(char *str, int device, const struct chan_opts *opts) +{ + struct tty_chan *data; + + if (*str != ':') { + printk(UM_KERN_ERR "tty_init : channel type 'tty' must specify " + "a device\n"); + return NULL; + } + str++; + + data = uml_kmalloc(sizeof(*data), UM_GFP_KERNEL); + if (data == NULL) + return NULL; + *data = ((struct tty_chan) { .dev = str, + .raw = opts->raw }); + + return data; +} + +static int tty_open(int input, int output, int primary, void *d, + char **dev_out) +{ + struct tty_chan *data = d; + int fd, err, mode = 0; + + if (input && output) + mode = O_RDWR; + else if (input) + mode = O_RDONLY; + else if (output) + mode = O_WRONLY; + + fd = open(data->dev, mode); + if (fd < 0) + return -errno; + + if (data->raw) { + CATCH_EINTR(err = tcgetattr(fd, &data->tt)); + if (err) + return err; + + err = raw(fd); + if (err) + return err; + } + + *dev_out = data->dev; + return fd; +} + +const struct chan_ops tty_ops = { + .type = "tty", + .init = tty_chan_init, + .open = tty_open, + .close = generic_close, + .read = generic_read, + .write = generic_write, + .console_write = generic_console_write, + .window_size = generic_window_size, + .free = generic_free, + .winch = 0, +}; diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c new file mode 100644 index 0000000..0a86811 --- /dev/null +++ b/arch/um/drivers/ubd_kern.c @@ -0,0 +1,1461 @@ +/* + * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +/* 2001-09-28...2002-04-17 + * Partition stuff by James_McMechan@hotmail.com + * old style ubd by setting UBD_SHIFT to 0 + * 2002-09-27...2002-10-18 massive tinkering for 2.5 + * partitions have changed in 2.5 + * 2003-01-29 more tinkering for 2.5.59-1 + * This should now address the sysfs problems and has + * the symlink for devfs to allow for booting with + * the common /dev/ubd/discX/... names rather than + * only /dev/ubdN/discN this version also has lots of + * clean ups preparing for ubd-many. + * James McMechan + */ + +#define MAJOR_NR UBD_MAJOR +#define UBD_SHIFT 4 + +#include "linux/kernel.h" +#include "linux/module.h" +#include "linux/blkdev.h" +#include "linux/hdreg.h" +#include "linux/init.h" +#include "linux/cdrom.h" +#include "linux/proc_fs.h" +#include "linux/ctype.h" +#include "linux/capability.h" +#include "linux/mm.h" +#include "linux/vmalloc.h" +#include "linux/blkpg.h" +#include "linux/genhd.h" +#include "linux/spinlock.h" +#include "linux/platform_device.h" +#include "linux/scatterlist.h" +#include "asm/segment.h" +#include "asm/uaccess.h" +#include "asm/irq.h" +#include "asm/types.h" +#include "asm/tlbflush.h" +#include "mem_user.h" +#include "kern_util.h" +#include "kern.h" +#include "mconsole_kern.h" +#include "init.h" +#include "irq_user.h" +#include "irq_kern.h" +#include "ubd_user.h" +#include "os.h" +#include "mem.h" +#include "mem_kern.h" +#include "cow.h" + +enum ubd_req { UBD_READ, UBD_WRITE }; + +struct io_thread_req { + struct request *req; + enum ubd_req op; + int fds[2]; + unsigned long offsets[2]; + unsigned long long offset; + unsigned long length; + char *buffer; + int sectorsize; + unsigned long sector_mask; + unsigned long long cow_offset; + unsigned long bitmap_words[2]; + int error; +}; + +static inline int ubd_test_bit(__u64 bit, unsigned char *data) +{ + __u64 n; + int bits, off; + + bits = sizeof(data[0]) * 8; + n = bit / bits; + off = bit % bits; + return (data[n] & (1 << off)) != 0; +} + +static inline void ubd_set_bit(__u64 bit, unsigned char *data) +{ + __u64 n; + int bits, off; + + bits = sizeof(data[0]) * 8; + n = bit / bits; + off = bit % bits; + data[n] |= (1 << off); +} +/*End stuff from ubd_user.h*/ + +#define DRIVER_NAME "uml-blkdev" + +static DEFINE_MUTEX(ubd_lock); + +static int ubd_open(struct block_device *bdev, fmode_t mode); +static int ubd_release(struct gendisk *disk, fmode_t mode); +static int ubd_ioctl(struct block_device *bdev, fmode_t mode, + unsigned int cmd, unsigned long arg); +static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo); + +#define MAX_DEV (16) + +static struct block_device_operations ubd_blops = { + .owner = THIS_MODULE, + .open = ubd_open, + .release = ubd_release, + .ioctl = ubd_ioctl, + .getgeo = ubd_getgeo, +}; + +/* Protected by ubd_lock */ +static int fake_major = MAJOR_NR; +static struct gendisk *ubd_gendisk[MAX_DEV]; +static struct gendisk *fake_gendisk[MAX_DEV]; + +#ifdef CONFIG_BLK_DEV_UBD_SYNC +#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 1, .c = 0, \ + .cl = 1 }) +#else +#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 0, .c = 0, \ + .cl = 1 }) +#endif +static struct openflags global_openflags = OPEN_FLAGS; + +struct cow { + /* backing file name */ + char *file; + /* backing file fd */ + int fd; + unsigned long *bitmap; + unsigned long bitmap_len; + int bitmap_offset; + int data_offset; +}; + +#define MAX_SG 64 + +struct ubd { + struct list_head restart; + /* name (and fd, below) of the file opened for writing, either the + * backing or the cow file. */ + char *file; + int count; + int fd; + __u64 size; + struct openflags boot_openflags; + struct openflags openflags; + unsigned shared:1; + unsigned no_cow:1; + struct cow cow; + struct platform_device pdev; + struct request_queue *queue; + spinlock_t lock; + struct scatterlist sg[MAX_SG]; + struct request *request; + int start_sg, end_sg; +}; + +#define DEFAULT_COW { \ + .file = NULL, \ + .fd = -1, \ + .bitmap = NULL, \ + .bitmap_offset = 0, \ + .data_offset = 0, \ +} + +#define DEFAULT_UBD { \ + .file = NULL, \ + .count = 0, \ + .fd = -1, \ + .size = -1, \ + .boot_openflags = OPEN_FLAGS, \ + .openflags = OPEN_FLAGS, \ + .no_cow = 0, \ + .shared = 0, \ + .cow = DEFAULT_COW, \ + .lock = SPIN_LOCK_UNLOCKED, \ + .request = NULL, \ + .start_sg = 0, \ + .end_sg = 0, \ +} + +/* Protected by ubd_lock */ +static struct ubd ubd_devs[MAX_DEV] = { [0 ... MAX_DEV - 1] = DEFAULT_UBD }; + +/* Only changed by fake_ide_setup which is a setup */ +static int fake_ide = 0; +static struct proc_dir_entry *proc_ide_root = NULL; +static struct proc_dir_entry *proc_ide = NULL; + +static void make_proc_ide(void) +{ + proc_ide_root = proc_mkdir("ide", NULL); + proc_ide = proc_mkdir("ide0", proc_ide_root); +} + +static int proc_ide_read_media(char *page, char **start, off_t off, int count, + int *eof, void *data) +{ + int len; + + strcpy(page, "disk\n"); + len = strlen("disk\n"); + len -= off; + if (len < count){ + *eof = 1; + if (len <= 0) return 0; + } + else len = count; + *start = page + off; + return len; +} + +static void make_ide_entries(const char *dev_name) +{ + struct proc_dir_entry *dir, *ent; + char name[64]; + + if(proc_ide_root == NULL) make_proc_ide(); + + dir = proc_mkdir(dev_name, proc_ide); + if(!dir) return; + + ent = create_proc_entry("media", S_IFREG|S_IRUGO, dir); + if(!ent) return; + ent->data = NULL; + ent->read_proc = proc_ide_read_media; + ent->write_proc = NULL; + snprintf(name, sizeof(name), "ide0/%s", dev_name); + proc_symlink(dev_name, proc_ide_root, name); +} + +static int fake_ide_setup(char *str) +{ + fake_ide = 1; + return 1; +} + +__setup("fake_ide", fake_ide_setup); + +__uml_help(fake_ide_setup, +"fake_ide\n" +" Create ide0 entries that map onto ubd devices.\n\n" +); + +static int parse_unit(char **ptr) +{ + char *str = *ptr, *end; + int n = -1; + + if(isdigit(*str)) { + n = simple_strtoul(str, &end, 0); + if(end == str) + return -1; + *ptr = end; + } + else if (('a' <= *str) && (*str <= 'z')) { + n = *str - 'a'; + str++; + *ptr = str; + } + return n; +} + +/* If *index_out == -1 at exit, the passed option was a general one; + * otherwise, the str pointer is used (and owned) inside ubd_devs array, so it + * should not be freed on exit. + */ +static int ubd_setup_common(char *str, int *index_out, char **error_out) +{ + struct ubd *ubd_dev; + struct openflags flags = global_openflags; + char *backing_file; + int n, err = 0, i; + + if(index_out) *index_out = -1; + n = *str; + if(n == '='){ + char *end; + int major; + + str++; + if(!strcmp(str, "sync")){ + global_openflags = of_sync(global_openflags); + goto out1; + } + + err = -EINVAL; + major = simple_strtoul(str, &end, 0); + if((*end != '\0') || (end == str)){ + *error_out = "Didn't parse major number"; + goto out1; + } + + mutex_lock(&ubd_lock); + if(fake_major != MAJOR_NR){ + *error_out = "Can't assign a fake major twice"; + goto out1; + } + + fake_major = major; + + printk(KERN_INFO "Setting extra ubd major number to %d\n", + major); + err = 0; + out1: + mutex_unlock(&ubd_lock); + return err; + } + + n = parse_unit(&str); + if(n < 0){ + *error_out = "Couldn't parse device number"; + return -EINVAL; + } + if(n >= MAX_DEV){ + *error_out = "Device number out of range"; + return 1; + } + + err = -EBUSY; + mutex_lock(&ubd_lock); + + ubd_dev = &ubd_devs[n]; + if(ubd_dev->file != NULL){ + *error_out = "Device is already configured"; + goto out; + } + + if (index_out) + *index_out = n; + + err = -EINVAL; + for (i = 0; i < sizeof("rscd="); i++) { + switch (*str) { + case 'r': + flags.w = 0; + break; + case 's': + flags.s = 1; + break; + case 'd': + ubd_dev->no_cow = 1; + break; + case 'c': + ubd_dev->shared = 1; + break; + case '=': + str++; + goto break_loop; + default: + *error_out = "Expected '=' or flag letter " + "(r, s, c, or d)"; + goto out; + } + str++; + } + + if (*str == '=') + *error_out = "Too many flags specified"; + else + *error_out = "Missing '='"; + goto out; + +break_loop: + backing_file = strchr(str, ','); + + if (backing_file == NULL) + backing_file = strchr(str, ':'); + + if(backing_file != NULL){ + if(ubd_dev->no_cow){ + *error_out = "Can't specify both 'd' and a cow file"; + goto out; + } + else { + *backing_file = '\0'; + backing_file++; + } + } + err = 0; + ubd_dev->file = str; + ubd_dev->cow.file = backing_file; + ubd_dev->boot_openflags = flags; +out: + mutex_unlock(&ubd_lock); + return err; +} + +static int ubd_setup(char *str) +{ + char *error; + int err; + + err = ubd_setup_common(str, NULL, &error); + if(err) + printk(KERN_ERR "Failed to initialize device with \"%s\" : " + "%s\n", str, error); + return 1; +} + +__setup("ubd", ubd_setup); +__uml_help(ubd_setup, +"ubd<n><flags>=<filename>[(:|,)<filename2>]\n" +" This is used to associate a device with a file in the underlying\n" +" filesystem. When specifying two filenames, the first one is the\n" +" COW name and the second is the backing file name. As separator you can\n" +" use either a ':' or a ',': the first one allows writing things like;\n" +" ubd0=~/Uml/root_cow:~/Uml/root_backing_file\n" +" while with a ',' the shell would not expand the 2nd '~'.\n" +" When using only one filename, UML will detect whether to treat it like\n" +" a COW file or a backing file. To override this detection, add the 'd'\n" +" flag:\n" +" ubd0d=BackingFile\n" +" Usually, there is a filesystem in the file, but \n" +" that's not required. Swap devices containing swap files can be\n" +" specified like this. Also, a file which doesn't contain a\n" +" filesystem can have its contents read in the virtual \n" +" machine by running 'dd' on the device. <n> must be in the range\n" +" 0 to 7. Appending an 'r' to the number will cause that device\n" +" to be mounted read-only. For example ubd1r=./ext_fs. Appending\n" +" an 's' will cause data to be written to disk on the host immediately.\n" +" 'c' will cause the device to be treated as being shared between multiple\n" +" UMLs and file locking will be turned off - this is appropriate for a\n" +" cluster filesystem and inappropriate at almost all other times.\n\n" +); + +static int udb_setup(char *str) +{ + printk("udb%s specified on command line is almost certainly a ubd -> " + "udb TYPO\n", str); + return 1; +} + +__setup("udb", udb_setup); +__uml_help(udb_setup, +"udb\n" +" This option is here solely to catch ubd -> udb typos, which can be\n" +" to impossible to catch visually unless you specifically look for\n" +" them. The only result of any option starting with 'udb' is an error\n" +" in the boot output.\n\n" +); + +static void do_ubd_request(struct request_queue * q); + +/* Only changed by ubd_init, which is an initcall. */ +static int thread_fd = -1; + +static void ubd_end_request(struct request *req, int bytes, int error) +{ + blk_end_request(req, error, bytes); +} + +/* Callable only from interrupt context - otherwise you need to do + * spin_lock_irq()/spin_lock_irqsave() */ +static inline void ubd_finish(struct request *req, int bytes) +{ + if(bytes < 0){ + ubd_end_request(req, 0, -EIO); + return; + } + ubd_end_request(req, bytes, 0); +} + +static LIST_HEAD(restart); + +/* XXX - move this inside ubd_intr. */ +/* Called without dev->lock held, and only in interrupt context. */ +static void ubd_handler(void) +{ + struct io_thread_req *req; + struct request *rq; + struct ubd *ubd; + struct list_head *list, *next_ele; + unsigned long flags; + int n; + + while(1){ + n = os_read_file(thread_fd, &req, + sizeof(struct io_thread_req *)); + if(n != sizeof(req)){ + if(n == -EAGAIN) + break; + printk(KERN_ERR "spurious interrupt in ubd_handler, " + "err = %d\n", -n); + return; + } + + rq = req->req; + rq->nr_sectors -= req->length >> 9; + if(rq->nr_sectors == 0) + ubd_finish(rq, rq->hard_nr_sectors << 9); + kfree(req); + } + reactivate_fd(thread_fd, UBD_IRQ); + + list_for_each_safe(list, next_ele, &restart){ + ubd = container_of(list, struct ubd, restart); + list_del_init(&ubd->restart); + spin_lock_irqsave(&ubd->lock, flags); + do_ubd_request(ubd->queue); + spin_unlock_irqrestore(&ubd->lock, flags); + } +} + +static irqreturn_t ubd_intr(int irq, void *dev) +{ + ubd_handler(); + return IRQ_HANDLED; +} + +/* Only changed by ubd_init, which is an initcall. */ +static int io_pid = -1; + +static void kill_io_thread(void) +{ + if(io_pid != -1) + os_kill_process(io_pid, 1); +} + +__uml_exitcall(kill_io_thread); + +static inline int ubd_file_size(struct ubd *ubd_dev, __u64 *size_out) +{ + char *file; + + file = ubd_dev->cow.file ? ubd_dev->cow.file : ubd_dev->file; + return os_file_size(file, size_out); +} + +static int read_cow_bitmap(int fd, void *buf, int offset, int len) +{ + int err; + + err = os_seek_file(fd, offset); + if (err < 0) + return err; + + err = os_read_file(fd, buf, len); + if (err < 0) + return err; + + return 0; +} + +static int backing_file_mismatch(char *file, __u64 size, time_t mtime) +{ + unsigned long modtime; + unsigned long long actual; + int err; + + err = os_file_modtime(file, &modtime); + if (err < 0) { + printk(KERN_ERR "Failed to get modification time of backing " + "file \"%s\", err = %d\n", file, -err); + return err; + } + + err = os_file_size(file, &actual); + if (err < 0) { + printk(KERN_ERR "Failed to get size of backing file \"%s\", " + "err = %d\n", file, -err); + return err; + } + + if (actual != size) { + /*__u64 can be a long on AMD64 and with %lu GCC complains; so + * the typecast.*/ + printk(KERN_ERR "Size mismatch (%llu vs %llu) of COW header " + "vs backing file\n", (unsigned long long) size, actual); + return -EINVAL; + } + if (modtime != mtime) { + printk(KERN_ERR "mtime mismatch (%ld vs %ld) of COW header vs " + "backing file\n", mtime, modtime); + return -EINVAL; + } + return 0; +} + +static int path_requires_switch(char *from_cmdline, char *from_cow, char *cow) +{ + struct uml_stat buf1, buf2; + int err; + + if (from_cmdline == NULL) + return 0; + if (!strcmp(from_cmdline, from_cow)) + return 0; + + err = os_stat_file(from_cmdline, &buf1); + if (err < 0) { + printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cmdline, + -err); + return 0; + } + err = os_stat_file(from_cow, &buf2); + if (err < 0) { + printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cow, + -err); + return 1; + } + if ((buf1.ust_dev == buf2.ust_dev) && (buf1.ust_ino == buf2.ust_ino)) + return 0; + + printk(KERN_ERR "Backing file mismatch - \"%s\" requested, " + "\"%s\" specified in COW header of \"%s\"\n", + from_cmdline, from_cow, cow); + return 1; +} + +static int open_ubd_file(char *file, struct openflags *openflags, int shared, + char **backing_file_out, int *bitmap_offset_out, + unsigned long *bitmap_len_out, int *data_offset_out, + int *create_cow_out) +{ + time_t mtime; + unsigned long long size; + __u32 version, align; + char *backing_file; + int fd, err, sectorsize, asked_switch, mode = 0644; + + fd = os_open_file(file, *openflags, mode); + if (fd < 0) { + if ((fd == -ENOENT) && (create_cow_out != NULL)) + *create_cow_out = 1; + if (!openflags->w || + ((fd != -EROFS) && (fd != -EACCES))) + return fd; + openflags->w = 0; + fd = os_open_file(file, *openflags, mode); + if (fd < 0) + return fd; + } + + if (shared) + printk(KERN_INFO "Not locking \"%s\" on the host\n", file); + else { + err = os_lock_file(fd, openflags->w); + if (err < 0) { + printk(KERN_ERR "Failed to lock '%s', err = %d\n", + file, -err); + goto out_close; + } + } + + /* Successful return case! */ + if (backing_file_out == NULL) + return fd; + + err = read_cow_header(file_reader, &fd, &version, &backing_file, &mtime, + &size, §orsize, &align, bitmap_offset_out); + if (err && (*backing_file_out != NULL)) { + printk(KERN_ERR "Failed to read COW header from COW file " + "\"%s\", errno = %d\n", file, -err); + goto out_close; + } + if (err) + return fd; + + asked_switch = path_requires_switch(*backing_file_out, backing_file, + file); + + /* Allow switching only if no mismatch. */ + if (asked_switch && !backing_file_mismatch(*backing_file_out, size, + mtime)) { + printk(KERN_ERR "Switching backing file to '%s'\n", + *backing_file_out); + err = write_cow_header(file, fd, *backing_file_out, + sectorsize, align, &size); + if (err) { + printk(KERN_ERR "Switch failed, errno = %d\n", -err); + goto out_close; + } + } else { + *backing_file_out = backing_file; + err = backing_file_mismatch(*backing_file_out, size, mtime); + if (err) + goto out_close; + } + + cow_sizes(version, size, sectorsize, align, *bitmap_offset_out, + bitmap_len_out, data_offset_out); + + return fd; + out_close: + os_close_file(fd); + return err; +} + +static int create_cow_file(char *cow_file, char *backing_file, + struct openflags flags, + int sectorsize, int alignment, int *bitmap_offset_out, + unsigned long *bitmap_len_out, int *data_offset_out) +{ + int err, fd; + + flags.c = 1; + fd = open_ubd_file(cow_file, &flags, 0, NULL, NULL, NULL, NULL, NULL); + if (fd < 0) { + err = fd; + printk(KERN_ERR "Open of COW file '%s' failed, errno = %d\n", + cow_file, -err); + goto out; + } + + err = init_cow_file(fd, cow_file, backing_file, sectorsize, alignment, + bitmap_offset_out, bitmap_len_out, + data_offset_out); + if (!err) + return fd; + os_close_file(fd); + out: + return err; +} + +static void ubd_close_dev(struct ubd *ubd_dev) +{ + os_close_file(ubd_dev->fd); + if(ubd_dev->cow.file == NULL) + return; + + os_close_file(ubd_dev->cow.fd); + vfree(ubd_dev->cow.bitmap); + ubd_dev->cow.bitmap = NULL; +} + +static int ubd_open_dev(struct ubd *ubd_dev) +{ + struct openflags flags; + char **back_ptr; + int err, create_cow, *create_ptr; + int fd; + + ubd_dev->openflags = ubd_dev->boot_openflags; + create_cow = 0; + create_ptr = (ubd_dev->cow.file != NULL) ? &create_cow : NULL; + back_ptr = ubd_dev->no_cow ? NULL : &ubd_dev->cow.file; + + fd = open_ubd_file(ubd_dev->file, &ubd_dev->openflags, ubd_dev->shared, + back_ptr, &ubd_dev->cow.bitmap_offset, + &ubd_dev->cow.bitmap_len, &ubd_dev->cow.data_offset, + create_ptr); + + if((fd == -ENOENT) && create_cow){ + fd = create_cow_file(ubd_dev->file, ubd_dev->cow.file, + ubd_dev->openflags, 1 << 9, PAGE_SIZE, + &ubd_dev->cow.bitmap_offset, + &ubd_dev->cow.bitmap_len, + &ubd_dev->cow.data_offset); + if(fd >= 0){ + printk(KERN_INFO "Creating \"%s\" as COW file for " + "\"%s\"\n", ubd_dev->file, ubd_dev->cow.file); + } + } + + if(fd < 0){ + printk("Failed to open '%s', errno = %d\n", ubd_dev->file, + -fd); + return fd; + } + ubd_dev->fd = fd; + + if(ubd_dev->cow.file != NULL){ + blk_queue_max_sectors(ubd_dev->queue, 8 * sizeof(long)); + + err = -ENOMEM; + ubd_dev->cow.bitmap = vmalloc(ubd_dev->cow.bitmap_len); + if(ubd_dev->cow.bitmap == NULL){ + printk(KERN_ERR "Failed to vmalloc COW bitmap\n"); + goto error; + } + flush_tlb_kernel_vm(); + + err = read_cow_bitmap(ubd_dev->fd, ubd_dev->cow.bitmap, + ubd_dev->cow.bitmap_offset, + ubd_dev->cow.bitmap_len); + if(err < 0) + goto error; + + flags = ubd_dev->openflags; + flags.w = 0; + err = open_ubd_file(ubd_dev->cow.file, &flags, ubd_dev->shared, NULL, + NULL, NULL, NULL, NULL); + if(err < 0) goto error; + ubd_dev->cow.fd = err; + } + return 0; + error: + os_close_file(ubd_dev->fd); + return err; +} + +static void ubd_device_release(struct device *dev) +{ + struct ubd *ubd_dev = dev->driver_data; + + blk_cleanup_queue(ubd_dev->queue); + *ubd_dev = ((struct ubd) DEFAULT_UBD); +} + +static int ubd_disk_register(int major, u64 size, int unit, + struct gendisk **disk_out) +{ + struct gendisk *disk; + + disk = alloc_disk(1 << UBD_SHIFT); + if(disk == NULL) + return -ENOMEM; + + disk->major = major; + disk->first_minor = unit << UBD_SHIFT; + disk->fops = &ubd_blops; + set_capacity(disk, size / 512); + if(major == MAJOR_NR) + sprintf(disk->disk_name, "ubd%c", 'a' + unit); + else + sprintf(disk->disk_name, "ubd_fake%d", unit); + + /* sysfs register (not for ide fake devices) */ + if (major == MAJOR_NR) { + ubd_devs[unit].pdev.id = unit; + ubd_devs[unit].pdev.name = DRIVER_NAME; + ubd_devs[unit].pdev.dev.release = ubd_device_release; + ubd_devs[unit].pdev.dev.driver_data = &ubd_devs[unit]; + platform_device_register(&ubd_devs[unit].pdev); + disk->driverfs_dev = &ubd_devs[unit].pdev.dev; + } + + disk->private_data = &ubd_devs[unit]; + disk->queue = ubd_devs[unit].queue; + add_disk(disk); + + *disk_out = disk; + return 0; +} + +#define ROUND_BLOCK(n) ((n + ((1 << 9) - 1)) & (-1 << 9)) + +static int ubd_add(int n, char **error_out) +{ + struct ubd *ubd_dev = &ubd_devs[n]; + int err = 0; + + if(ubd_dev->file == NULL) + goto out; + + err = ubd_file_size(ubd_dev, &ubd_dev->size); + if(err < 0){ + *error_out = "Couldn't determine size of device's file"; + goto out; + } + + ubd_dev->size = ROUND_BLOCK(ubd_dev->size); + + INIT_LIST_HEAD(&ubd_dev->restart); + sg_init_table(ubd_dev->sg, MAX_SG); + + err = -ENOMEM; + ubd_dev->queue = blk_init_queue(do_ubd_request, &ubd_dev->lock); + if (ubd_dev->queue == NULL) { + *error_out = "Failed to initialize device queue"; + goto out; + } + ubd_dev->queue->queuedata = ubd_dev; + + blk_queue_max_hw_segments(ubd_dev->queue, MAX_SG); + err = ubd_disk_register(MAJOR_NR, ubd_dev->size, n, &ubd_gendisk[n]); + if(err){ + *error_out = "Failed to register device"; + goto out_cleanup; + } + + if(fake_major != MAJOR_NR) + ubd_disk_register(fake_major, ubd_dev->size, n, + &fake_gendisk[n]); + + /* + * Perhaps this should also be under the "if (fake_major)" above + * using the fake_disk->disk_name + */ + if (fake_ide) + make_ide_entries(ubd_gendisk[n]->disk_name); + + err = 0; +out: + return err; + +out_cleanup: + blk_cleanup_queue(ubd_dev->queue); + goto out; +} + +static int ubd_config(char *str, char **error_out) +{ + int n, ret; + + /* This string is possibly broken up and stored, so it's only + * freed if ubd_setup_common fails, or if only general options + * were set. + */ + str = kstrdup(str, GFP_KERNEL); + if (str == NULL) { + *error_out = "Failed to allocate memory"; + return -ENOMEM; + } + + ret = ubd_setup_common(str, &n, error_out); + if (ret) + goto err_free; + + if (n == -1) { + ret = 0; + goto err_free; + } + + mutex_lock(&ubd_lock); + ret = ubd_add(n, error_out); + if (ret) + ubd_devs[n].file = NULL; + mutex_unlock(&ubd_lock); + +out: + return ret; + +err_free: + kfree(str); + goto out; +} + +static int ubd_get_config(char *name, char *str, int size, char **error_out) +{ + struct ubd *ubd_dev; + int n, len = 0; + + n = parse_unit(&name); + if((n >= MAX_DEV) || (n < 0)){ + *error_out = "ubd_get_config : device number out of range"; + return -1; + } + + ubd_dev = &ubd_devs[n]; + mutex_lock(&ubd_lock); + + if(ubd_dev->file == NULL){ + CONFIG_CHUNK(str, size, len, "", 1); + goto out; + } + + CONFIG_CHUNK(str, size, len, ubd_dev->file, 0); + + if(ubd_dev->cow.file != NULL){ + CONFIG_CHUNK(str, size, len, ",", 0); + CONFIG_CHUNK(str, size, len, ubd_dev->cow.file, 1); + } + else CONFIG_CHUNK(str, size, len, "", 1); + + out: + mutex_unlock(&ubd_lock); + return len; +} + +static int ubd_id(char **str, int *start_out, int *end_out) +{ + int n; + + n = parse_unit(str); + *start_out = 0; + *end_out = MAX_DEV - 1; + return n; +} + +static int ubd_remove(int n, char **error_out) +{ + struct gendisk *disk = ubd_gendisk[n]; + struct ubd *ubd_dev; + int err = -ENODEV; + + mutex_lock(&ubd_lock); + + ubd_dev = &ubd_devs[n]; + + if(ubd_dev->file == NULL) + goto out; + + /* you cannot remove a open disk */ + err = -EBUSY; + if(ubd_dev->count > 0) + goto out; + + ubd_gendisk[n] = NULL; + if(disk != NULL){ + del_gendisk(disk); + put_disk(disk); + } + + if(fake_gendisk[n] != NULL){ + del_gendisk(fake_gendisk[n]); + put_disk(fake_gendisk[n]); + fake_gendisk[n] = NULL; + } + + err = 0; + platform_device_unregister(&ubd_dev->pdev); +out: + mutex_unlock(&ubd_lock); + return err; +} + +/* All these are called by mconsole in process context and without + * ubd-specific locks. The structure itself is const except for .list. + */ +static struct mc_device ubd_mc = { + .list = LIST_HEAD_INIT(ubd_mc.list), + .name = "ubd", + .config = ubd_config, + .get_config = ubd_get_config, + .id = ubd_id, + .remove = ubd_remove, +}; + +static int __init ubd_mc_init(void) +{ + mconsole_register_dev(&ubd_mc); + return 0; +} + +__initcall(ubd_mc_init); + +static int __init ubd0_init(void) +{ + struct ubd *ubd_dev = &ubd_devs[0]; + + mutex_lock(&ubd_lock); + if(ubd_dev->file == NULL) + ubd_dev->file = "root_fs"; + mutex_unlock(&ubd_lock); + + return 0; +} + +__initcall(ubd0_init); + +/* Used in ubd_init, which is an initcall */ +static struct platform_driver ubd_driver = { + .driver = { + .name = DRIVER_NAME, + }, +}; + +static int __init ubd_init(void) +{ + char *error; + int i, err; + + if (register_blkdev(MAJOR_NR, "ubd")) + return -1; + + if (fake_major != MAJOR_NR) { + char name[sizeof("ubd_nnn\0")]; + + snprintf(name, sizeof(name), "ubd_%d", fake_major); + if (register_blkdev(fake_major, "ubd")) + return -1; + } + platform_driver_register(&ubd_driver); + mutex_lock(&ubd_lock); + for (i = 0; i < MAX_DEV; i++){ + err = ubd_add(i, &error); + if(err) + printk(KERN_ERR "Failed to initialize ubd device %d :" + "%s\n", i, error); + } + mutex_unlock(&ubd_lock); + return 0; +} + +late_initcall(ubd_init); + +static int __init ubd_driver_init(void){ + unsigned long stack; + int err; + + /* Set by CONFIG_BLK_DEV_UBD_SYNC or ubd=sync.*/ + if(global_openflags.s){ + printk(KERN_INFO "ubd: Synchronous mode\n"); + /* Letting ubd=sync be like using ubd#s= instead of ubd#= is + * enough. So use anyway the io thread. */ + } + stack = alloc_stack(0, 0); + io_pid = start_io_thread(stack + PAGE_SIZE - sizeof(void *), + &thread_fd); + if(io_pid < 0){ + printk(KERN_ERR + "ubd : Failed to start I/O thread (errno = %d) - " + "falling back to synchronous I/O\n", -io_pid); + io_pid = -1; + return 0; + } + err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr, + IRQF_DISABLED, "ubd", ubd_devs); + if(err != 0) + printk(KERN_ERR "um_request_irq failed - errno = %d\n", -err); + return 0; +} + +device_initcall(ubd_driver_init); + +static int ubd_open(struct block_device *bdev, fmode_t mode) +{ + struct gendisk *disk = bdev->bd_disk; + struct ubd *ubd_dev = disk->private_data; + int err = 0; + + if(ubd_dev->count == 0){ + err = ubd_open_dev(ubd_dev); + if(err){ + printk(KERN_ERR "%s: Can't open \"%s\": errno = %d\n", + disk->disk_name, ubd_dev->file, -err); + goto out; + } + } + ubd_dev->count++; + set_disk_ro(disk, !ubd_dev->openflags.w); + + /* This should no more be needed. And it didn't work anyway to exclude + * read-write remounting of filesystems.*/ + /*if((mode & FMODE_WRITE) && !ubd_dev->openflags.w){ + if(--ubd_dev->count == 0) ubd_close_dev(ubd_dev); + err = -EROFS; + }*/ + out: + return err; +} + +static int ubd_release(struct gendisk *disk, fmode_t mode) +{ + struct ubd *ubd_dev = disk->private_data; + + if(--ubd_dev->count == 0) + ubd_close_dev(ubd_dev); + return 0; +} + +static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask, + __u64 *cow_offset, unsigned long *bitmap, + __u64 bitmap_offset, unsigned long *bitmap_words, + __u64 bitmap_len) +{ + __u64 sector = io_offset >> 9; + int i, update_bitmap = 0; + + for(i = 0; i < length >> 9; i++){ + if(cow_mask != NULL) + ubd_set_bit(i, (unsigned char *) cow_mask); + if(ubd_test_bit(sector + i, (unsigned char *) bitmap)) + continue; + + update_bitmap = 1; + ubd_set_bit(sector + i, (unsigned char *) bitmap); + } + + if(!update_bitmap) + return; + + *cow_offset = sector / (sizeof(unsigned long) * 8); + + /* This takes care of the case where we're exactly at the end of the + * device, and *cow_offset + 1 is off the end. So, just back it up + * by one word. Thanks to Lynn Kerby for the fix and James McMechan + * for the original diagnosis. + */ + if (*cow_offset == (DIV_ROUND_UP(bitmap_len, + sizeof(unsigned long)) - 1)) + (*cow_offset)--; + + bitmap_words[0] = bitmap[*cow_offset]; + bitmap_words[1] = bitmap[*cow_offset + 1]; + + *cow_offset *= sizeof(unsigned long); + *cow_offset += bitmap_offset; +} + +static void cowify_req(struct io_thread_req *req, unsigned long *bitmap, + __u64 bitmap_offset, __u64 bitmap_len) +{ + __u64 sector = req->offset >> 9; + int i; + + if(req->length > (sizeof(req->sector_mask) * 8) << 9) + panic("Operation too long"); + + if(req->op == UBD_READ) { + for(i = 0; i < req->length >> 9; i++){ + if(ubd_test_bit(sector + i, (unsigned char *) bitmap)) + ubd_set_bit(i, (unsigned char *) + &req->sector_mask); + } + } + else cowify_bitmap(req->offset, req->length, &req->sector_mask, + &req->cow_offset, bitmap, bitmap_offset, + req->bitmap_words, bitmap_len); +} + +/* Called with dev->lock held */ +static void prepare_request(struct request *req, struct io_thread_req *io_req, + unsigned long long offset, int page_offset, + int len, struct page *page) +{ + struct gendisk *disk = req->rq_disk; + struct ubd *ubd_dev = disk->private_data; + + io_req->req = req; + io_req->fds[0] = (ubd_dev->cow.file != NULL) ? ubd_dev->cow.fd : + ubd_dev->fd; + io_req->fds[1] = ubd_dev->fd; + io_req->cow_offset = -1; + io_req->offset = offset; + io_req->length = len; + io_req->error = 0; + io_req->sector_mask = 0; + + io_req->op = (rq_data_dir(req) == READ) ? UBD_READ : UBD_WRITE; + io_req->offsets[0] = 0; + io_req->offsets[1] = ubd_dev->cow.data_offset; + io_req->buffer = page_address(page) + page_offset; + io_req->sectorsize = 1 << 9; + + if(ubd_dev->cow.file != NULL) + cowify_req(io_req, ubd_dev->cow.bitmap, + ubd_dev->cow.bitmap_offset, ubd_dev->cow.bitmap_len); + +} + +/* Called with dev->lock held */ +static void do_ubd_request(struct request_queue *q) +{ + struct io_thread_req *io_req; + struct request *req; + int n, last_sectors; + + while(1){ + struct ubd *dev = q->queuedata; + if(dev->end_sg == 0){ + struct request *req = elv_next_request(q); + if(req == NULL) + return; + + dev->request = req; + blkdev_dequeue_request(req); + dev->start_sg = 0; + dev->end_sg = blk_rq_map_sg(q, req, dev->sg); + } + + req = dev->request; + last_sectors = 0; + while(dev->start_sg < dev->end_sg){ + struct scatterlist *sg = &dev->sg[dev->start_sg]; + + req->sector += last_sectors; + io_req = kmalloc(sizeof(struct io_thread_req), + GFP_ATOMIC); + if(io_req == NULL){ + if(list_empty(&dev->restart)) + list_add(&dev->restart, &restart); + return; + } + prepare_request(req, io_req, + (unsigned long long) req->sector << 9, + sg->offset, sg->length, sg_page(sg)); + + last_sectors = sg->length >> 9; + n = os_write_file(thread_fd, &io_req, + sizeof(struct io_thread_req *)); + if(n != sizeof(struct io_thread_req *)){ + if(n != -EAGAIN) + printk("write to io thread failed, " + "errno = %d\n", -n); + else if(list_empty(&dev->restart)) + list_add(&dev->restart, &restart); + kfree(io_req); + return; + } + + dev->start_sg++; + } + dev->end_sg = 0; + dev->request = NULL; + } +} + +static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo) +{ + struct ubd *ubd_dev = bdev->bd_disk->private_data; + + geo->heads = 128; + geo->sectors = 32; + geo->cylinders = ubd_dev->size / (128 * 32 * 512); + return 0; +} + +static int ubd_ioctl(struct block_device *bdev, fmode_t mode, + unsigned int cmd, unsigned long arg) +{ + struct ubd *ubd_dev = bdev->bd_disk->private_data; + struct hd_driveid ubd_id = { + .cyls = 0, + .heads = 128, + .sectors = 32, + }; + + switch (cmd) { + struct cdrom_volctrl volume; + case HDIO_GET_IDENTITY: + ubd_id.cyls = ubd_dev->size / (128 * 32 * 512); + if(copy_to_user((char __user *) arg, (char *) &ubd_id, + sizeof(ubd_id))) + return -EFAULT; + return 0; + + case CDROMVOLREAD: + if(copy_from_user(&volume, (char __user *) arg, sizeof(volume))) + return -EFAULT; + volume.channel0 = 255; + volume.channel1 = 255; + volume.channel2 = 255; + volume.channel3 = 255; + if(copy_to_user((char __user *) arg, &volume, sizeof(volume))) + return -EFAULT; + return 0; + } + return -EINVAL; +} + +static int update_bitmap(struct io_thread_req *req) +{ + int n; + + if(req->cow_offset == -1) + return 0; + + n = os_seek_file(req->fds[1], req->cow_offset); + if(n < 0){ + printk("do_io - bitmap lseek failed : err = %d\n", -n); + return 1; + } + + n = os_write_file(req->fds[1], &req->bitmap_words, + sizeof(req->bitmap_words)); + if(n != sizeof(req->bitmap_words)){ + printk("do_io - bitmap update failed, err = %d fd = %d\n", -n, + req->fds[1]); + return 1; + } + + return 0; +} + +static void do_io(struct io_thread_req *req) +{ + char *buf; + unsigned long len; + int n, nsectors, start, end, bit; + int err; + __u64 off; + + nsectors = req->length / req->sectorsize; + start = 0; + do { + bit = ubd_test_bit(start, (unsigned char *) &req->sector_mask); + end = start; + while((end < nsectors) && + (ubd_test_bit(end, (unsigned char *) + &req->sector_mask) == bit)) + end++; + + off = req->offset + req->offsets[bit] + + start * req->sectorsize; + len = (end - start) * req->sectorsize; + buf = &req->buffer[start * req->sectorsize]; + + err = os_seek_file(req->fds[bit], off); + if(err < 0){ + printk("do_io - lseek failed : err = %d\n", -err); + req->error = 1; + return; + } + if(req->op == UBD_READ){ + n = 0; + do { + buf = &buf[n]; + len -= n; + n = os_read_file(req->fds[bit], buf, len); + if (n < 0) { + printk("do_io - read failed, err = %d " + "fd = %d\n", -n, req->fds[bit]); + req->error = 1; + return; + } + } while((n < len) && (n != 0)); + if (n < len) memset(&buf[n], 0, len - n); + } else { + n = os_write_file(req->fds[bit], buf, len); + if(n != len){ + printk("do_io - write failed err = %d " + "fd = %d\n", -n, req->fds[bit]); + req->error = 1; + return; + } + } + + start = end; + } while(start < nsectors); + + req->error = update_bitmap(req); +} + +/* Changed in start_io_thread, which is serialized by being called only + * from ubd_init, which is an initcall. + */ +int kernel_fd = -1; + +/* Only changed by the io thread. XXX: currently unused. */ +static int io_count = 0; + +int io_thread(void *arg) +{ + struct io_thread_req *req; + int n; + + ignore_sigwinch_sig(); + while(1){ + n = os_read_file(kernel_fd, &req, + sizeof(struct io_thread_req *)); + if(n != sizeof(struct io_thread_req *)){ + if(n < 0) + printk("io_thread - read failed, fd = %d, " + "err = %d\n", kernel_fd, -n); + else { + printk("io_thread - short read, fd = %d, " + "length = %d\n", kernel_fd, n); + } + continue; + } + io_count++; + do_io(req); + n = os_write_file(kernel_fd, &req, + sizeof(struct io_thread_req *)); + if(n != sizeof(struct io_thread_req *)) + printk("io_thread - write failed, fd = %d, err = %d\n", + kernel_fd, -n); + } + + return 0; +} diff --git a/arch/um/drivers/ubd_user.c b/arch/um/drivers/ubd_user.c new file mode 100644 index 0000000..b591bb9 --- /dev/null +++ b/arch/um/drivers/ubd_user.c @@ -0,0 +1,67 @@ +/* + * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) + * Copyright (C) 2001 Ridgerun,Inc (glonnon@ridgerun.com) + * Licensed under the GPL + */ + +#include <stddef.h> +#include <unistd.h> +#include <errno.h> +#include <sched.h> +#include <signal.h> +#include <string.h> +#include <netinet/in.h> +#include <sys/time.h> +#include <sys/socket.h> +#include <sys/mman.h> +#include <sys/param.h> +#include "asm/types.h" +#include "user.h" +#include "ubd_user.h" +#include "os.h" +#include "cow.h" + +#include <endian.h> +#include <byteswap.h> + +void ignore_sigwinch_sig(void) +{ + signal(SIGWINCH, SIG_IGN); +} + +int start_io_thread(unsigned long sp, int *fd_out) +{ + int pid, fds[2], err; + + err = os_pipe(fds, 1, 1); + if(err < 0){ + printk("start_io_thread - os_pipe failed, err = %d\n", -err); + goto out; + } + + kernel_fd = fds[0]; + *fd_out = fds[1]; + + err = os_set_fd_block(*fd_out, 0); + if (err) { + printk("start_io_thread - failed to set nonblocking I/O.\n"); + goto out_close; + } + + pid = clone(io_thread, (void *) sp, CLONE_FILES | CLONE_VM, NULL); + if(pid < 0){ + err = -errno; + printk("start_io_thread - clone failed : errno = %d\n", errno); + goto out_close; + } + + return(pid); + + out_close: + os_close_file(fds[0]); + os_close_file(fds[1]); + kernel_fd = -1; + *fd_out = -1; + out: + return err; +} diff --git a/arch/um/drivers/vde.h b/arch/um/drivers/vde.h new file mode 100644 index 0000000..fc3a059 --- /dev/null +++ b/arch/um/drivers/vde.h @@ -0,0 +1,32 @@ +/* + * Copyright (C) 2007 Luca Bigliardi (shammash@artha.org). + * Licensed under the GPL. + */ + +#ifndef __UM_VDE_H__ +#define __UM_VDE_H__ + +struct vde_data { + char *vde_switch; + char *descr; + void *args; + void *conn; + void *dev; +}; + +struct vde_init { + char *vde_switch; + char *descr; + int port; + char *group; + int mode; +}; + +extern const struct net_user_info vde_user_info; + +extern void vde_init_libstuff(struct vde_data *vpri, struct vde_init *init); + +extern int vde_user_read(void *conn, void *buf, int len); +extern int vde_user_write(void *conn, void *buf, int len); + +#endif diff --git a/arch/um/drivers/vde_kern.c b/arch/um/drivers/vde_kern.c new file mode 100644 index 0000000..add7e72 --- /dev/null +++ b/arch/um/drivers/vde_kern.c @@ -0,0 +1,129 @@ +/* + * Copyright (C) 2007 Luca Bigliardi (shammash@artha.org). + * Licensed under the GPL. + * + * Transport usage: + * ethN=vde,<vde_switch>,<mac addr>,<port>,<group>,<mode>,<description> + * + */ + +#include "linux/init.h" +#include <linux/netdevice.h> +#include "net_kern.h" +#include "net_user.h" +#include "vde.h" + +static void vde_init(struct net_device *dev, void *data) +{ + struct vde_init *init = data; + struct uml_net_private *pri; + struct vde_data *vpri; + + pri = dev->priv; + vpri = (struct vde_data *) pri->user; + + vpri->vde_switch = init->vde_switch; + vpri->descr = init->descr ? init->descr : "UML vde_transport"; + vpri->args = NULL; + vpri->conn = NULL; + vpri->dev = dev; + + printk("vde backend - %s, ", vpri->vde_switch ? + vpri->vde_switch : "(default socket)"); + + vde_init_libstuff(vpri, init); + + printk("\n"); +} + +static int vde_read(int fd, struct sk_buff *skb, struct uml_net_private *lp) +{ + struct vde_data *pri = (struct vde_data *) &lp->user; + + if (pri->conn != NULL) + return vde_user_read(pri->conn, skb_mac_header(skb), + skb->dev->mtu + ETH_HEADER_OTHER); + + printk(KERN_ERR "vde_read - we have no VDECONN to read from"); + return -EBADF; +} + +static int vde_write(int fd, struct sk_buff *skb, struct uml_net_private *lp) +{ + struct vde_data *pri = (struct vde_data *) &lp->user; + + if (pri->conn != NULL) + return vde_user_write((void *)pri->conn, skb->data, + skb->len); + + printk(KERN_ERR "vde_write - we have no VDECONN to write to"); + return -EBADF; +} + +static const struct net_kern_info vde_kern_info = { + .init = vde_init, + .protocol = eth_protocol, + .read = vde_read, + .write = vde_write, +}; + +static int vde_setup(char *str, char **mac_out, void *data) +{ + struct vde_init *init = data; + char *remain, *port_str = NULL, *mode_str = NULL, *last; + + *init = ((struct vde_init) + { .vde_switch = NULL, + .descr = NULL, + .port = 0, + .group = NULL, + .mode = 0 }); + + remain = split_if_spec(str, &init->vde_switch, mac_out, &port_str, + &init->group, &mode_str, &init->descr, NULL); + + if (remain != NULL) + printk(KERN_WARNING "vde_setup - Ignoring extra data :" + "'%s'\n", remain); + + if (port_str != NULL) { + init->port = simple_strtoul(port_str, &last, 10); + if ((*last != '\0') || (last == port_str)) { + printk(KERN_ERR "vde_setup - Bad port : '%s'\n", + port_str); + return 0; + } + } + + if (mode_str != NULL) { + init->mode = simple_strtoul(mode_str, &last, 8); + if ((*last != '\0') || (last == mode_str)) { + printk(KERN_ERR "vde_setup - Bad mode : '%s'\n", + mode_str); + return 0; + } + } + + printk(KERN_INFO "Configured vde device: %s\n", init->vde_switch ? + init->vde_switch : "(default socket)"); + + return 1; +} + +static struct transport vde_transport = { + .list = LIST_HEAD_INIT(vde_transport.list), + .name = "vde", + .setup = vde_setup, + .user = &vde_user_info, + .kern = &vde_kern_info, + .private_size = sizeof(struct vde_data), + .setup_size = sizeof(struct vde_init), +}; + +static int register_vde(void) +{ + register_transport(&vde_transport); + return 0; +} + +late_initcall(register_vde); diff --git a/arch/um/drivers/vde_user.c b/arch/um/drivers/vde_user.c new file mode 100644 index 0000000..56533db --- /dev/null +++ b/arch/um/drivers/vde_user.c @@ -0,0 +1,127 @@ +/* + * Copyright (C) 2007 Luca Bigliardi (shammash@artha.org). + * Licensed under the GPL. + */ + +#include <stddef.h> +#include <errno.h> +#include <libvdeplug.h> +#include "kern_constants.h" +#include "net_user.h" +#include "um_malloc.h" +#include "user.h" +#include "vde.h" + +static int vde_user_init(void *data, void *dev) +{ + struct vde_data *pri = data; + VDECONN *conn = NULL; + int err = -EINVAL; + + pri->dev = dev; + + conn = vde_open(pri->vde_switch, pri->descr, pri->args); + + if (conn == NULL) { + err = -errno; + printk(UM_KERN_ERR "vde_user_init: vde_open failed, " + "errno = %d\n", errno); + return err; + } + + printk(UM_KERN_INFO "vde backend - connection opened\n"); + + pri->conn = conn; + + return 0; +} + +static int vde_user_open(void *data) +{ + struct vde_data *pri = data; + + if (pri->conn != NULL) + return vde_datafd(pri->conn); + + printk(UM_KERN_WARNING "vde_open - we have no VDECONN to open"); + return -EINVAL; +} + +static void vde_remove(void *data) +{ + struct vde_data *pri = data; + + if (pri->conn != NULL) { + printk(UM_KERN_INFO "vde backend - closing connection\n"); + vde_close(pri->conn); + pri->conn = NULL; + kfree(pri->args); + pri->args = NULL; + return; + } + + printk(UM_KERN_WARNING "vde_remove - we have no VDECONN to remove"); +} + +const struct net_user_info vde_user_info = { + .init = vde_user_init, + .open = vde_user_open, + .close = NULL, + .remove = vde_remove, + .add_address = NULL, + .delete_address = NULL, + .mtu = ETH_MAX_PACKET, + .max_packet = ETH_MAX_PACKET + ETH_HEADER_OTHER, +}; + +void vde_init_libstuff(struct vde_data *vpri, struct vde_init *init) +{ + struct vde_open_args *args; + + vpri->args = kmalloc(sizeof(struct vde_open_args), UM_GFP_KERNEL); + if (vpri->args == NULL) { + printk(UM_KERN_ERR "vde_init_libstuff - vde_open_args " + "allocation failed"); + return; + } + + args = vpri->args; + + args->port = init->port; + args->group = init->group; + args->mode = init->mode ? init->mode : 0700; + + args->port ? printk(UM_KERN_INFO "port %d", args->port) : + printk(UM_KERN_INFO "undefined port"); +} + +int vde_user_read(void *conn, void *buf, int len) +{ + VDECONN *vconn = conn; + int rv; + + if (vconn == NULL) + return 0; + + rv = vde_recv(vconn, buf, len, 0); + if (rv < 0) { + if (errno == EAGAIN) + return 0; + return -errno; + } + else if (rv == 0) + return -ENOTCONN; + + return rv; +} + +int vde_user_write(void *conn, void *buf, int len) +{ + VDECONN *vconn = conn; + + if (vconn == NULL) + return 0; + + return vde_send(vconn, buf, len, 0); +} + diff --git a/arch/um/drivers/xterm.c b/arch/um/drivers/xterm.c new file mode 100644 index 0000000..da2caa5 --- /dev/null +++ b/arch/um/drivers/xterm.c @@ -0,0 +1,224 @@ +/* + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <errno.h> +#include <string.h> +#include <termios.h> +#include "chan_user.h" +#include "kern_constants.h" +#include "os.h" +#include "um_malloc.h" +#include "user.h" +#include "xterm.h" + +struct xterm_chan { + int pid; + int helper_pid; + char *title; + int device; + int raw; + struct termios tt; +}; + +static void *xterm_init(char *str, int device, const struct chan_opts *opts) +{ + struct xterm_chan *data; + + data = uml_kmalloc(sizeof(*data), UM_GFP_KERNEL); + if (data == NULL) + return NULL; + *data = ((struct xterm_chan) { .pid = -1, + .helper_pid = -1, + .device = device, + .title = opts->xterm_title, + .raw = opts->raw } ); + return data; +} + +/* Only changed by xterm_setup, which is a setup */ +static char *terminal_emulator = "xterm"; +static char *title_switch = "-T"; +static char *exec_switch = "-e"; + +static int __init xterm_setup(char *line, int *add) +{ + *add = 0; + terminal_emulator = line; + + line = strchr(line, ','); + if (line == NULL) + return 0; + + *line++ = '\0'; + if (*line) + title_switch = line; + + line = strchr(line, ','); + if (line == NULL) + return 0; + + *line++ = '\0'; + if (*line) + exec_switch = line; + + return 0; +} + +__uml_setup("xterm=", xterm_setup, +"xterm=<terminal emulator>,<title switch>,<exec switch>\n" +" Specifies an alternate terminal emulator to use for the debugger,\n" +" consoles, and serial lines when they are attached to the xterm channel.\n" +" The values are the terminal emulator binary, the switch it uses to set\n" +" its title, and the switch it uses to execute a subprocess,\n" +" respectively. The title switch must have the form '<switch> title',\n" +" not '<switch>=title'. Similarly, the exec switch must have the form\n" +" '<switch> command arg1 arg2 ...'.\n" +" The default values are 'xterm=xterm,-T,-e'. Values for gnome-terminal\n" +" are 'xterm=gnome-terminal,-t,-x'.\n\n" +); + +static int xterm_open(int input, int output, int primary, void *d, + char **dev_out) +{ + struct xterm_chan *data = d; + int pid, fd, new, err; + char title[256], file[] = "/tmp/xterm-pipeXXXXXX"; + char *argv[] = { terminal_emulator, title_switch, title, exec_switch, + "/usr/lib/uml/port-helper", "-uml-socket", + file, NULL }; + + if (access(argv[4], X_OK) < 0) + argv[4] = "port-helper"; + + /* + * Check that DISPLAY is set, this doesn't guarantee the xterm + * will work but w/o it we can be pretty sure it won't. + */ + if (getenv("DISPLAY") == NULL) { + printk(UM_KERN_ERR "xterm_open: $DISPLAY not set.\n"); + return -ENODEV; + } + + /* + * This business of getting a descriptor to a temp file, + * deleting the file and closing the descriptor is just to get + * a known-unused name for the Unix socket that we really + * want. + */ + fd = mkstemp(file); + if (fd < 0) { + err = -errno; + printk(UM_KERN_ERR "xterm_open : mkstemp failed, errno = %d\n", + errno); + return err; + } + + if (unlink(file)) { + err = -errno; + printk(UM_KERN_ERR "xterm_open : unlink failed, errno = %d\n", + errno); + return err; + } + close(fd); + + fd = os_create_unix_socket(file, sizeof(file), 1); + if (fd < 0) { + printk(UM_KERN_ERR "xterm_open : create_unix_socket failed, " + "errno = %d\n", -fd); + return fd; + } + + sprintf(title, data->title, data->device); + pid = run_helper(NULL, NULL, argv); + if (pid < 0) { + err = pid; + printk(UM_KERN_ERR "xterm_open : run_helper failed, " + "errno = %d\n", -err); + goto out_close1; + } + + err = os_set_fd_block(fd, 0); + if (err < 0) { + printk(UM_KERN_ERR "xterm_open : failed to set descriptor " + "non-blocking, err = %d\n", -err); + goto out_kill; + } + + new = xterm_fd(fd, &data->helper_pid); + if (new < 0) { + err = new; + printk(UM_KERN_ERR "xterm_open : os_rcv_fd failed, err = %d\n", + -err); + goto out_kill; + } + + err = os_set_fd_block(new, 0); + if (err) { + printk(UM_KERN_ERR "xterm_open : failed to set xterm " + "descriptor non-blocking, err = %d\n", -err); + goto out_close2; + } + + CATCH_EINTR(err = tcgetattr(new, &data->tt)); + if (err) { + new = err; + goto out_close2; + } + + if (data->raw) { + err = raw(new); + if (err) { + new = err; + goto out_close2; + } + } + + unlink(file); + data->pid = pid; + *dev_out = NULL; + + return new; + + out_close2: + close(new); + out_kill: + os_kill_process(pid, 1); + out_close1: + close(fd); + + return err; +} + +static void xterm_close(int fd, void *d) +{ + struct xterm_chan *data = d; + + if (data->pid != -1) + os_kill_process(data->pid, 1); + data->pid = -1; + + if (data->helper_pid != -1) + os_kill_process(data->helper_pid, 0); + data->helper_pid = -1; + + os_close_file(fd); +} + +const struct chan_ops xterm_ops = { + .type = "xterm", + .init = xterm_init, + .open = xterm_open, + .close = xterm_close, + .read = generic_read, + .write = generic_write, + .console_write = generic_console_write, + .window_size = generic_window_size, + .free = generic_free, + .winch = 1, +}; diff --git a/arch/um/drivers/xterm.h b/arch/um/drivers/xterm.h new file mode 100644 index 0000000..f33a6e7 --- /dev/null +++ b/arch/um/drivers/xterm.h @@ -0,0 +1,22 @@ +/* + * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __XTERM_H__ +#define __XTERM_H__ + +extern int xterm_fd(int socket, int *pid_out); + +#endif + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff --git a/arch/um/drivers/xterm_kern.c b/arch/um/drivers/xterm_kern.c new file mode 100644 index 0000000..b646bcc --- /dev/null +++ b/arch/um/drivers/xterm_kern.c @@ -0,0 +1,76 @@ +/* + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include <linux/slab.h> +#include <linux/completion.h> +#include <linux/irqreturn.h> +#include <asm/irq.h> +#include "irq_kern.h" +#include "os.h" + +struct xterm_wait { + struct completion ready; + int fd; + int pid; + int new_fd; +}; + +static irqreturn_t xterm_interrupt(int irq, void *data) +{ + struct xterm_wait *xterm = data; + int fd; + + fd = os_rcv_fd(xterm->fd, &xterm->pid); + if (fd == -EAGAIN) + return IRQ_NONE; + + xterm->new_fd = fd; + complete(&xterm->ready); + + return IRQ_HANDLED; +} + +int xterm_fd(int socket, int *pid_out) +{ + struct xterm_wait *data; + int err, ret; + + data = kmalloc(sizeof(*data), GFP_KERNEL); + if (data == NULL) { + printk(KERN_ERR "xterm_fd : failed to allocate xterm_wait\n"); + return -ENOMEM; + } + + /* This is a locked semaphore... */ + *data = ((struct xterm_wait) { .fd = socket, + .pid = -1, + .new_fd = -1 }); + init_completion(&data->ready); + + err = um_request_irq(XTERM_IRQ, socket, IRQ_READ, xterm_interrupt, + IRQF_DISABLED | IRQF_SHARED | IRQF_SAMPLE_RANDOM, + "xterm", data); + if (err) { + printk(KERN_ERR "xterm_fd : failed to get IRQ for xterm, " + "err = %d\n", err); + ret = err; + goto out; + } + + /* ... so here we wait for an xterm interrupt. + * + * XXX Note, if the xterm doesn't work for some reason (eg. DISPLAY + * isn't set) this will hang... */ + wait_for_completion(&data->ready); + + free_irq(XTERM_IRQ, data); + + ret = data->new_fd; + *pid_out = data->pid; + out: + kfree(data); + + return ret; +} diff --git a/arch/um/include/asm/a.out-core.h b/arch/um/include/asm/a.out-core.h new file mode 100644 index 0000000..995643b --- /dev/null +++ b/arch/um/include/asm/a.out-core.h @@ -0,0 +1,27 @@ +/* a.out coredump register dumper + * + * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#ifndef __UM_A_OUT_CORE_H +#define __UM_A_OUT_CORE_H + +#ifdef __KERNEL__ + +#include <linux/user.h> + +/* + * fill in the user structure for an a.out core dump + */ +static inline void aout_dump_thread(struct pt_regs *regs, struct user *u) +{ +} + +#endif /* __KERNEL__ */ +#endif /* __UM_A_OUT_CORE_H */ diff --git a/arch/um/include/asm/apic.h b/arch/um/include/asm/apic.h new file mode 100644 index 0000000..876dee8 --- /dev/null +++ b/arch/um/include/asm/apic.h @@ -0,0 +1,4 @@ +#ifndef __UM_APIC_H +#define __UM_APIC_H + +#endif diff --git a/arch/um/include/asm/auxvec.h b/arch/um/include/asm/auxvec.h new file mode 100644 index 0000000..1e5e1c2 --- /dev/null +++ b/arch/um/include/asm/auxvec.h @@ -0,0 +1,4 @@ +#ifndef __UM_AUXVEC_H +#define __UM_AUXVEC_H + +#endif diff --git a/arch/um/include/asm/bugs.h b/arch/um/include/asm/bugs.h new file mode 100644 index 0000000..6a72e24 --- /dev/null +++ b/arch/um/include/asm/bugs.h @@ -0,0 +1,6 @@ +#ifndef __UM_BUGS_H +#define __UM_BUGS_H + +void check_bugs(void); + +#endif diff --git a/arch/um/include/asm/cache.h b/arch/um/include/asm/cache.h new file mode 100644 index 0000000..19e1bdd --- /dev/null +++ b/arch/um/include/asm/cache.h @@ -0,0 +1,17 @@ +#ifndef __UM_CACHE_H +#define __UM_CACHE_H + + +#if defined(CONFIG_UML_X86) && !defined(CONFIG_64BIT) +# define L1_CACHE_SHIFT (CONFIG_X86_L1_CACHE_SHIFT) +#elif defined(CONFIG_UML_X86) /* 64-bit */ +# define L1_CACHE_SHIFT 6 /* Should be 7 on Intel */ +#else +/* XXX: this was taken from x86, now it's completely random. Luckily only + * affects SMP padding. */ +# define L1_CACHE_SHIFT 5 +#endif + +#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) + +#endif diff --git a/arch/um/include/asm/checksum.h b/arch/um/include/asm/checksum.h new file mode 100644 index 0000000..5b50136 --- /dev/null +++ b/arch/um/include/asm/checksum.h @@ -0,0 +1,6 @@ +#ifndef __UM_CHECKSUM_H +#define __UM_CHECKSUM_H + +#include "sysdep/checksum.h" + +#endif diff --git a/arch/um/include/asm/common.lds.S b/arch/um/include/asm/common.lds.S new file mode 100644 index 0000000..cb02486 --- /dev/null +++ b/arch/um/include/asm/common.lds.S @@ -0,0 +1,130 @@ +#include <asm-generic/vmlinux.lds.h> + + .fini : { *(.fini) } =0x9090 + _etext = .; + PROVIDE (etext = .); + + . = ALIGN(4096); + _sdata = .; + PROVIDE (sdata = .); + + RODATA + + .unprotected : { *(.unprotected) } + . = ALIGN(4096); + PROVIDE (_unprotected_end = .); + + . = ALIGN(4096); + .note : { *(.note.*) } + __ex_table : { + __start___ex_table = .; + *(__ex_table) + __stop___ex_table = .; + } + + BUG_TABLE + + .uml.setup.init : { + __uml_setup_start = .; + *(.uml.setup.init) + __uml_setup_end = .; + } + + .uml.help.init : { + __uml_help_start = .; + *(.uml.help.init) + __uml_help_end = .; + } + + .uml.postsetup.init : { + __uml_postsetup_start = .; + *(.uml.postsetup.init) + __uml_postsetup_end = .; + } + + .init.setup : { + __setup_start = .; + *(.init.setup) + __setup_end = .; + } + + . = ALIGN(32); + .data.percpu : { + __per_cpu_start = . ; + *(.data.percpu) + __per_cpu_end = . ; + } + + .initcall.init : { + __initcall_start = .; + INITCALLS + __initcall_end = .; + } + + .con_initcall.init : { + __con_initcall_start = .; + *(.con_initcall.init) + __con_initcall_end = .; + } + + .uml.initcall.init : { + __uml_initcall_start = .; + *(.uml.initcall.init) + __uml_initcall_end = .; + } + __init_end = .; + + SECURITY_INIT + + .exitcall : { + __exitcall_begin = .; + *(.exitcall.exit) + __exitcall_end = .; + } + + .uml.exitcall : { + __uml_exitcall_begin = .; + *(.uml.exitcall.exit) + __uml_exitcall_end = .; + } + + . = ALIGN(4); + .altinstructions : { + __alt_instructions = .; + *(.altinstructions) + __alt_instructions_end = .; + } + .altinstr_replacement : { *(.altinstr_replacement) } + /* .exit.text is discard at runtime, not link time, to deal with references + from .altinstructions and .eh_frame */ + .exit.text : { *(.exit.text) } + .exit.data : { *(.exit.data) } + + .preinit_array : { + __preinit_array_start = .; + *(.preinit_array) + __preinit_array_end = .; + } + .init_array : { + __init_array_start = .; + *(.init_array) + __init_array_end = .; + } + .fini_array : { + __fini_array_start = .; + *(.fini_array) + __fini_array_end = .; + } + + . = ALIGN(4096); + .init.ramfs : { + __initramfs_start = .; + *(.init.ramfs) + __initramfs_end = .; + } + + /* Sections to be discarded */ + /DISCARD/ : { + *(.exitcall.exit) + } + diff --git a/arch/um/include/asm/cputime.h b/arch/um/include/asm/cputime.h new file mode 100644 index 0000000..c84acba --- /dev/null +++ b/arch/um/include/asm/cputime.h @@ -0,0 +1,6 @@ +#ifndef __UM_CPUTIME_H +#define __UM_CPUTIME_H + +#include <asm-generic/cputime.h> + +#endif /* __UM_CPUTIME_H */ diff --git a/arch/um/include/asm/current.h b/arch/um/include/asm/current.h new file mode 100644 index 0000000..c2191d9 --- /dev/null +++ b/arch/um/include/asm/current.h @@ -0,0 +1,13 @@ +/* + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#ifndef __UM_CURRENT_H +#define __UM_CURRENT_H + +#include "linux/thread_info.h" + +#define current (current_thread_info()->task) + +#endif diff --git a/arch/um/include/asm/delay.h b/arch/um/include/asm/delay.h new file mode 100644 index 0000000..c71e32b --- /dev/null +++ b/arch/um/include/asm/delay.h @@ -0,0 +1,20 @@ +#ifndef __UM_DELAY_H +#define __UM_DELAY_H + +#define MILLION 1000000 + +/* Undefined on purpose */ +extern void __bad_udelay(void); + +extern void __udelay(unsigned long usecs); +extern void __delay(unsigned long loops); + +#define udelay(n) ((__builtin_constant_p(n) && (n) > 20000) ? \ + __bad_udelay() : __udelay(n)) + +/* It appears that ndelay is not used at all for UML, and has never been + * implemented. */ +extern void __unimplemented_ndelay(void); +#define ndelay(n) __unimplemented_ndelay() + +#endif diff --git a/arch/um/include/asm/desc.h b/arch/um/include/asm/desc.h new file mode 100644 index 0000000..4ec34a5 --- /dev/null +++ b/arch/um/include/asm/desc.h @@ -0,0 +1,16 @@ +#ifndef __UM_DESC_H +#define __UM_DESC_H + +/* Taken from asm-i386/desc.h, it's the only thing we need. The rest wouldn't + * compile, and has never been used. */ +#define LDT_empty(info) (\ + (info)->base_addr == 0 && \ + (info)->limit == 0 && \ + (info)->contents == 0 && \ + (info)->read_exec_only == 1 && \ + (info)->seg_32bit == 0 && \ + (info)->limit_in_pages == 0 && \ + (info)->seg_not_present == 1 && \ + (info)->useable == 0 ) + +#endif diff --git a/arch/um/include/asm/device.h b/arch/um/include/asm/device.h new file mode 100644 index 0000000..d8f9872 --- /dev/null +++ b/arch/um/include/asm/device.h @@ -0,0 +1,7 @@ +/* + * Arch specific extensions to struct device + * + * This file is released under the GPLv2 + */ +#include <asm-generic/device.h> + diff --git a/arch/um/include/asm/dma-mapping.h b/arch/um/include/asm/dma-mapping.h new file mode 100644 index 0000000..90fc708 --- /dev/null +++ b/arch/um/include/asm/dma-mapping.h @@ -0,0 +1,128 @@ +#ifndef _ASM_DMA_MAPPING_H +#define _ASM_DMA_MAPPING_H + +#include <asm/scatterlist.h> + +static inline int +dma_supported(struct device *dev, u64 mask) +{ + BUG(); + return(0); +} + +static inline int +dma_set_mask(struct device *dev, u64 dma_mask) +{ + BUG(); + return(0); +} + +static inline void * +dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, + gfp_t flag) +{ + BUG(); + return((void *) 0); +} + +static inline void +dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t dma_handle) +{ + BUG(); +} + +static inline dma_addr_t +dma_map_single(struct device *dev, void *cpu_addr, size_t size, + enum dma_data_direction direction) +{ + BUG(); + return(0); +} + +static inline void +dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size, + enum dma_data_direction direction) +{ + BUG(); +} + +static inline dma_addr_t +dma_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction direction) +{ + BUG(); + return(0); +} + +static inline void +dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size, + enum dma_data_direction direction) +{ + BUG(); +} + +static inline int +dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction direction) +{ + BUG(); + return(0); +} + +static inline void +dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nhwentries, + enum dma_data_direction direction) +{ + BUG(); +} + +static inline void +dma_sync_single(struct device *dev, dma_addr_t dma_handle, size_t size, + enum dma_data_direction direction) +{ + BUG(); +} + +static inline void +dma_sync_sg(struct device *dev, struct scatterlist *sg, int nelems, + enum dma_data_direction direction) +{ + BUG(); +} + +#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) +#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h) +#define dma_is_consistent(d, h) (1) + +static inline int +dma_get_cache_alignment(void) +{ + BUG(); + return(0); +} + +static inline void +dma_sync_single_range(struct device *dev, dma_addr_t dma_handle, + unsigned long offset, size_t size, + enum dma_data_direction direction) +{ + BUG(); +} + +static inline void +dma_cache_sync(struct device *dev, void *vaddr, size_t size, + enum dma_data_direction direction) +{ + BUG(); +} + +static inline int +dma_mapping_error(struct device *dev, dma_addr_t dma_handle) +{ + BUG(); + return 0; +} + +#endif diff --git a/arch/um/include/asm/dma.h b/arch/um/include/asm/dma.h new file mode 100644 index 0000000..9f6139a --- /dev/null +++ b/arch/um/include/asm/dma.h @@ -0,0 +1,10 @@ +#ifndef __UM_DMA_H +#define __UM_DMA_H + +#include "asm/io.h" + +extern unsigned long uml_physmem; + +#define MAX_DMA_ADDRESS (uml_physmem) + +#endif diff --git a/arch/um/include/asm/emergency-restart.h b/arch/um/include/asm/emergency-restart.h new file mode 100644 index 0000000..108d8c4 --- /dev/null +++ b/arch/um/include/asm/emergency-restart.h @@ -0,0 +1,6 @@ +#ifndef _ASM_EMERGENCY_RESTART_H +#define _ASM_EMERGENCY_RESTART_H + +#include <asm-generic/emergency-restart.h> + +#endif /* _ASM_EMERGENCY_RESTART_H */ diff --git a/arch/um/include/asm/fixmap.h b/arch/um/include/asm/fixmap.h new file mode 100644 index 0000000..69c0252 --- /dev/null +++ b/arch/um/include/asm/fixmap.h @@ -0,0 +1,99 @@ +#ifndef __UM_FIXMAP_H +#define __UM_FIXMAP_H + +#include <asm/processor.h> +#include <asm/system.h> +#include <asm/kmap_types.h> +#include <asm/archparam.h> +#include <asm/page.h> +#include <linux/threads.h> + +/* + * Here we define all the compile-time 'special' virtual + * addresses. The point is to have a constant address at + * compile time, but to set the physical address only + * in the boot process. We allocate these special addresses + * from the end of virtual memory (0xfffff000) backwards. + * Also this lets us do fail-safe vmalloc(), we + * can guarantee that these special addresses and + * vmalloc()-ed addresses never overlap. + * + * these 'compile-time allocated' memory buffers are + * fixed-size 4k pages. (or larger if used with an increment + * highger than 1) use fixmap_set(idx,phys) to associate + * physical memory with fixmap indices. + * + * TLB entries of such buffers will not be flushed across + * task switches. + */ + +/* + * on UP currently we will have no trace of the fixmap mechanizm, + * no page table allocations, etc. This might change in the + * future, say framebuffers for the console driver(s) could be + * fix-mapped? + */ +enum fixed_addresses { +#ifdef CONFIG_HIGHMEM + FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */ + FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1, +#endif + __end_of_fixed_addresses +}; + +extern void __set_fixmap (enum fixed_addresses idx, + unsigned long phys, pgprot_t flags); + +#define set_fixmap(idx, phys) \ + __set_fixmap(idx, phys, PAGE_KERNEL) +/* + * Some hardware wants to get fixmapped without caching. + */ +#define set_fixmap_nocache(idx, phys) \ + __set_fixmap(idx, phys, PAGE_KERNEL_NOCACHE) +/* + * used by vmalloc.c. + * + * Leave one empty page between vmalloc'ed areas and + * the start of the fixmap, and leave one page empty + * at the top of mem.. + */ + +#define FIXADDR_TOP (TASK_SIZE - 2 * PAGE_SIZE) +#define FIXADDR_SIZE (__end_of_fixed_addresses << PAGE_SHIFT) +#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) + +#define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT)) +#define __virt_to_fix(x) ((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT) + +extern void __this_fixmap_does_not_exist(void); + +/* + * 'index to address' translation. If anyone tries to use the idx + * directly without tranlation, we catch the bug with a NULL-deference + * kernel oops. Illegal ranges of incoming indices are caught too. + */ +static inline unsigned long fix_to_virt(const unsigned int idx) +{ + /* + * this branch gets completely eliminated after inlining, + * except when someone tries to use fixaddr indices in an + * illegal way. (such as mixing up address types or using + * out-of-range indices). + * + * If it doesn't get removed, the linker will complain + * loudly with a reasonably clear error message.. + */ + if (idx >= __end_of_fixed_addresses) + __this_fixmap_does_not_exist(); + + return __fix_to_virt(idx); +} + +static inline unsigned long virt_to_fix(const unsigned long vaddr) +{ + BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START); + return __virt_to_fix(vaddr); +} + +#endif diff --git a/arch/um/include/asm/futex.h b/arch/um/include/asm/futex.h new file mode 100644 index 0000000..6a332a9 --- /dev/null +++ b/arch/um/include/asm/futex.h @@ -0,0 +1,6 @@ +#ifndef _ASM_FUTEX_H +#define _ASM_FUTEX_H + +#include <asm-generic/futex.h> + +#endif diff --git a/arch/um/include/asm/hardirq.h b/arch/um/include/asm/hardirq.h new file mode 100644 index 0000000..313ebb8 --- /dev/null +++ b/arch/um/include/asm/hardirq.h @@ -0,0 +1,25 @@ +/* (c) 2004 cw@f00f.org, GPLv2 blah blah */ + +#ifndef __ASM_UM_HARDIRQ_H +#define __ASM_UM_HARDIRQ_H + +#include <linux/threads.h> +#include <linux/irq.h> + +/* NOTE: When SMP works again we might want to make this + * ____cacheline_aligned or maybe use per_cpu state? --cw */ +typedef struct { + unsigned int __softirq_pending; +} irq_cpustat_t; + +#include <linux/irq_cpustat.h> + +/* As this would be very strange for UML to get we BUG() after the + * printk. */ +static inline void ack_bad_irq(unsigned int irq) +{ + printk(KERN_ERR "unexpected IRQ %02x\n", irq); + BUG(); +} + +#endif /* __ASM_UM_HARDIRQ_H */ diff --git a/arch/um/include/asm/hw_irq.h b/arch/um/include/asm/hw_irq.h new file mode 100644 index 0000000..1cf84cf --- /dev/null +++ b/arch/um/include/asm/hw_irq.h @@ -0,0 +1,7 @@ +#ifndef _ASM_UM_HW_IRQ_H +#define _ASM_UM_HW_IRQ_H + +#include "asm/irq.h" +#include "asm/archparam.h" + +#endif diff --git a/arch/um/include/asm/io.h b/arch/um/include/asm/io.h new file mode 100644 index 0000000..44e8b8c --- /dev/null +++ b/arch/um/include/asm/io.h @@ -0,0 +1,57 @@ +#ifndef __UM_IO_H +#define __UM_IO_H + +#include "asm/page.h" + +#define IO_SPACE_LIMIT 0xdeadbeef /* Sure hope nothing uses this */ + +static inline int inb(unsigned long i) { return(0); } +static inline void outb(char c, unsigned long i) { } + +/* + * Change virtual addresses to physical addresses and vv. + * These are pretty trivial + */ +static inline unsigned long virt_to_phys(volatile void * address) +{ + return __pa((void *) address); +} + +static inline void * phys_to_virt(unsigned long address) +{ + return __va(address); +} + +/* + * Convert a physical pointer to a virtual kernel pointer for /dev/mem + * access + */ +#define xlate_dev_mem_ptr(p) __va(p) + +/* + * Convert a virtual cached pointer to an uncached pointer + */ +#define xlate_dev_kmem_ptr(p) p + +static inline void writeb(unsigned char b, volatile void __iomem *addr) +{ + *(volatile unsigned char __force *) addr = b; +} +static inline void writew(unsigned short b, volatile void __iomem *addr) +{ + *(volatile unsigned short __force *) addr = b; +} +static inline void writel(unsigned int b, volatile void __iomem *addr) +{ + *(volatile unsigned int __force *) addr = b; +} +static inline void writeq(unsigned int b, volatile void __iomem *addr) +{ + *(volatile unsigned long long __force *) addr = b; +} +#define __raw_writeb writeb +#define __raw_writew writew +#define __raw_writel writel +#define __raw_writeq writeq + +#endif diff --git a/arch/um/include/asm/irq.h b/arch/um/include/asm/irq.h new file mode 100644 index 0000000..4a2037f --- /dev/null +++ b/arch/um/include/asm/irq.h @@ -0,0 +1,23 @@ +#ifndef __UM_IRQ_H +#define __UM_IRQ_H + +#define TIMER_IRQ 0 +#define UMN_IRQ 1 +#define CONSOLE_IRQ 2 +#define CONSOLE_WRITE_IRQ 3 +#define UBD_IRQ 4 +#define UM_ETH_IRQ 5 +#define SSL_IRQ 6 +#define SSL_WRITE_IRQ 7 +#define ACCEPT_IRQ 8 +#define MCONSOLE_IRQ 9 +#define WINCH_IRQ 10 +#define SIGIO_WRITE_IRQ 11 +#define TELNETD_IRQ 12 +#define XTERM_IRQ 13 +#define RANDOM_IRQ 14 + +#define LAST_IRQ RANDOM_IRQ +#define NR_IRQS (LAST_IRQ + 1) + +#endif diff --git a/arch/um/include/asm/irq_regs.h b/arch/um/include/asm/irq_regs.h new file mode 100644 index 0000000..3dd9c0b --- /dev/null +++ b/arch/um/include/asm/irq_regs.h @@ -0,0 +1 @@ +#include <asm-generic/irq_regs.h> diff --git a/arch/um/include/asm/irq_vectors.h b/arch/um/include/asm/irq_vectors.h new file mode 100644 index 0000000..62ddba6 --- /dev/null +++ b/arch/um/include/asm/irq_vectors.h @@ -0,0 +1,20 @@ +/* + * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __UM_IRQ_VECTORS_H +#define __UM_IRQ_VECTORS_H + +#endif + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff --git a/arch/um/include/asm/irqflags.h b/arch/um/include/asm/irqflags.h new file mode 100644 index 0000000..659b9ab --- /dev/null +++ b/arch/um/include/asm/irqflags.h @@ -0,0 +1,6 @@ +#ifndef __UM_IRQFLAGS_H +#define __UM_IRQFLAGS_H + +/* Empty for now */ + +#endif diff --git a/arch/um/include/asm/kdebug.h b/arch/um/include/asm/kdebug.h new file mode 100644 index 0000000..6ece1b0 --- /dev/null +++ b/arch/um/include/asm/kdebug.h @@ -0,0 +1 @@ +#include <asm-generic/kdebug.h> diff --git a/arch/um/include/asm/kmap_types.h b/arch/um/include/asm/kmap_types.h new file mode 100644 index 0000000..6c03acd --- /dev/null +++ b/arch/um/include/asm/kmap_types.h @@ -0,0 +1,29 @@ +/* + * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __UM_KMAP_TYPES_H +#define __UM_KMAP_TYPES_H + +/* No more #include "asm/arch/kmap_types.h" ! */ + +enum km_type { + KM_BOUNCE_READ, + KM_SKB_SUNRPC_DATA, + KM_SKB_DATA_SOFTIRQ, + KM_USER0, + KM_USER1, + KM_UML_USERCOPY, /* UML specific, for copy_*_user - used in do_op_one_page */ + KM_BIO_SRC_IRQ, + KM_BIO_DST_IRQ, + KM_PTE0, + KM_PTE1, + KM_IRQ0, + KM_IRQ1, + KM_SOFTIRQ0, + KM_SOFTIRQ1, + KM_TYPE_NR +}; + +#endif diff --git a/arch/um/include/asm/mmu.h b/arch/um/include/asm/mmu.h new file mode 100644 index 0000000..2cf35c2 --- /dev/null +++ b/arch/um/include/asm/mmu.h @@ -0,0 +1,22 @@ +/* + * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __MMU_H +#define __MMU_H + +#include "um_mmu.h" + +#endif + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff --git a/arch/um/include/asm/mmu_context.h b/arch/um/include/asm/mmu_context.h new file mode 100644 index 0000000..54f42e8 --- /dev/null +++ b/arch/um/include/asm/mmu_context.h @@ -0,0 +1,54 @@ +/* + * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#ifndef __UM_MMU_CONTEXT_H +#define __UM_MMU_CONTEXT_H + +#include "linux/sched.h" +#include "um_mmu.h" + +extern void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm); +extern void arch_exit_mmap(struct mm_struct *mm); + +#define get_mmu_context(task) do ; while(0) +#define activate_context(tsk) do ; while(0) + +#define deactivate_mm(tsk,mm) do { } while (0) + +extern void force_flush_all(void); + +static inline void activate_mm(struct mm_struct *old, struct mm_struct *new) +{ + /* + * This is called by fs/exec.c and sys_unshare() + * when the new ->mm is used for the first time. + */ + __switch_mm(&new->context.id); + arch_dup_mmap(old, new); +} + +static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, + struct task_struct *tsk) +{ + unsigned cpu = smp_processor_id(); + + if(prev != next){ + cpu_clear(cpu, prev->cpu_vm_mask); + cpu_set(cpu, next->cpu_vm_mask); + if(next != &init_mm) + __switch_mm(&next->context.id); + } +} + +static inline void enter_lazy_tlb(struct mm_struct *mm, + struct task_struct *tsk) +{ +} + +extern int init_new_context(struct task_struct *task, struct mm_struct *mm); + +extern void destroy_context(struct mm_struct *mm); + +#endif diff --git a/arch/um/include/asm/mutex.h b/arch/um/include/asm/mutex.h new file mode 100644 index 0000000..458c1f7 --- /dev/null +++ b/arch/um/include/asm/mutex.h @@ -0,0 +1,9 @@ +/* + * Pull in the generic implementation for the mutex fastpath. + * + * TODO: implement optimized primitives instead, or leave the generic + * implementation in place, or pick the atomic_xchg() based generic + * implementation. (see asm-generic/mutex-xchg.h for details) + */ + +#include <asm-generic/mutex-dec.h> diff --git a/arch/um/include/asm/page.h b/arch/um/include/asm/page.h new file mode 100644 index 0000000..55f28a0 --- /dev/null +++ b/arch/um/include/asm/page.h @@ -0,0 +1,122 @@ +/* + * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com) + * Copyright 2003 PathScale, Inc. + * Licensed under the GPL + */ + +#ifndef __UM_PAGE_H +#define __UM_PAGE_H + +#include <linux/const.h> + +/* PAGE_SHIFT determines the page size */ +#define PAGE_SHIFT 12 +#define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT) +#define PAGE_MASK (~(PAGE_SIZE-1)) + +#ifndef __ASSEMBLY__ + +struct page; + +#include <linux/types.h> +#include <sysdep/vm-flags.h> + +/* + * These are used to make use of C type-checking.. + */ + +#define clear_page(page) memset((void *)(page), 0, PAGE_SIZE) +#define copy_page(to,from) memcpy((void *)(to), (void *)(from), PAGE_SIZE) + +#define clear_user_page(page, vaddr, pg) clear_page(page) +#define copy_user_page(to, from, vaddr, pg) copy_page(to, from) + +#if defined(CONFIG_3_LEVEL_PGTABLES) && !defined(CONFIG_64BIT) + +typedef struct { unsigned long pte_low, pte_high; } pte_t; +typedef struct { unsigned long pmd; } pmd_t; +typedef struct { unsigned long pgd; } pgd_t; +#define pte_val(x) ((x).pte_low | ((unsigned long long) (x).pte_high << 32)) + +#define pte_get_bits(pte, bits) ((pte).pte_low & (bits)) +#define pte_set_bits(pte, bits) ((pte).pte_low |= (bits)) +#define pte_clear_bits(pte, bits) ((pte).pte_low &= ~(bits)) +#define pte_copy(to, from) ({ (to).pte_high = (from).pte_high; \ + smp_wmb(); \ + (to).pte_low = (from).pte_low; }) +#define pte_is_zero(pte) (!((pte).pte_low & ~_PAGE_NEWPAGE) && !(pte).pte_high) +#define pte_set_val(pte, phys, prot) \ + ({ (pte).pte_high = (phys) >> 32; \ + (pte).pte_low = (phys) | pgprot_val(prot); }) + +#define pmd_val(x) ((x).pmd) +#define __pmd(x) ((pmd_t) { (x) } ) + +typedef unsigned long long pfn_t; +typedef unsigned long long phys_t; + +#else + +typedef struct { unsigned long pte; } pte_t; +typedef struct { unsigned long pgd; } pgd_t; + +#ifdef CONFIG_3_LEVEL_PGTABLES +typedef struct { unsigned long pmd; } pmd_t; +#define pmd_val(x) ((x).pmd) +#define __pmd(x) ((pmd_t) { (x) } ) +#endif + +#define pte_val(x) ((x).pte) + + +#define pte_get_bits(p, bits) ((p).pte & (bits)) +#define pte_set_bits(p, bits) ((p).pte |= (bits)) +#define pte_clear_bits(p, bits) ((p).pte &= ~(bits)) +#define pte_copy(to, from) ((to).pte = (from).pte) +#define pte_is_zero(p) (!((p).pte & ~_PAGE_NEWPAGE)) +#define pte_set_val(p, phys, prot) (p).pte = (phys | pgprot_val(prot)) + +typedef unsigned long pfn_t; +typedef unsigned long phys_t; + +#endif + +typedef struct { unsigned long pgprot; } pgprot_t; + +typedef struct page *pgtable_t; + +#define pgd_val(x) ((x).pgd) +#define pgprot_val(x) ((x).pgprot) + +#define __pte(x) ((pte_t) { (x) } ) +#define __pgd(x) ((pgd_t) { (x) } ) +#define __pgprot(x) ((pgprot_t) { (x) } ) + +extern unsigned long uml_physmem; + +#define PAGE_OFFSET (uml_physmem) +#define KERNELBASE PAGE_OFFSET + +#define __va_space (8*1024*1024) + +#include "mem.h" + +/* Cast to unsigned long before casting to void * to avoid a warning from + * mmap_kmem about cutting a long long down to a void *. Not sure that + * casting is the right thing, but 32-bit UML can't have 64-bit virtual + * addresses + */ +#define __pa(virt) to_phys((void *) (unsigned long) (virt)) +#define __va(phys) to_virt((unsigned long) (phys)) + +#define phys_to_pfn(p) ((pfn_t) ((p) >> PAGE_SHIFT)) +#define pfn_to_phys(pfn) ((phys_t) ((pfn) << PAGE_SHIFT)) + +#define pfn_valid(pfn) ((pfn) < max_mapnr) +#define virt_addr_valid(v) pfn_valid(phys_to_pfn(__pa(v))) + +#include <asm-generic/memory_model.h> +#include <asm-generic/page.h> + +#endif /* __ASSEMBLY__ */ +#endif /* __UM_PAGE_H */ diff --git a/arch/um/include/asm/page_offset.h b/arch/um/include/asm/page_offset.h new file mode 100644 index 0000000..1c168df --- /dev/null +++ b/arch/um/include/asm/page_offset.h @@ -0,0 +1 @@ +#define PAGE_OFFSET_RAW (uml_physmem) diff --git a/arch/um/include/asm/param.h b/arch/um/include/asm/param.h new file mode 100644 index 0000000..e44f4e6 --- /dev/null +++ b/arch/um/include/asm/param.h @@ -0,0 +1,20 @@ +#ifndef _UM_PARAM_H +#define _UM_PARAM_H + +#define EXEC_PAGESIZE 4096 + +#ifndef NOGROUP +#define NOGROUP (-1) +#endif + +#define MAXHOSTNAMELEN 64 /* max length of hostname */ + +#ifdef __KERNEL__ +#define HZ CONFIG_HZ +#define USER_HZ 100 /* .. some user interfaces are in "ticks" */ +#define CLOCKS_PER_SEC (USER_HZ) /* frequency at which times() counts */ +#else +#define HZ 100 +#endif + +#endif diff --git a/arch/um/include/asm/pci.h b/arch/um/include/asm/pci.h new file mode 100644 index 0000000..5992319 --- /dev/null +++ b/arch/um/include/asm/pci.h @@ -0,0 +1,7 @@ +#ifndef __UM_PCI_H +#define __UM_PCI_H + +#define PCI_DMA_BUS_IS_PHYS (1) +#define pcibios_scan_all_fns(a, b) 0 + +#endif diff --git a/arch/um/include/asm/pda.h b/arch/um/include/asm/pda.h new file mode 100644 index 0000000..0d8bf33 --- /dev/null +++ b/arch/um/include/asm/pda.h @@ -0,0 +1,31 @@ +/* + * Copyright 2003 PathScale, Inc. + * + * Licensed under the GPL + */ + +#ifndef __UM_PDA_X86_64_H +#define __UM_PDA_X86_64_H + +/* XXX */ +struct foo { + unsigned int __softirq_pending; + unsigned int __nmi_count; +}; + +extern struct foo me; + +#define read_pda(me) (&me) + +#endif + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff --git a/arch/um/include/asm/pgalloc.h b/arch/um/include/asm/pgalloc.h new file mode 100644 index 0000000..9062a6e --- /dev/null +++ b/arch/um/include/asm/pgalloc.h @@ -0,0 +1,72 @@ +/* + * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) + * Copyright 2003 PathScale, Inc. + * Derived from include/asm-i386/pgalloc.h and include/asm-i386/pgtable.h + * Licensed under the GPL + */ + +#ifndef __UM_PGALLOC_H +#define __UM_PGALLOC_H + +#include "linux/mm.h" +#include "asm/fixmap.h" + +#define pmd_populate_kernel(mm, pmd, pte) \ + set_pmd(pmd, __pmd(_PAGE_TABLE + (unsigned long) __pa(pte))) + +#define pmd_populate(mm, pmd, pte) \ + set_pmd(pmd, __pmd(_PAGE_TABLE + \ + ((unsigned long long)page_to_pfn(pte) << \ + (unsigned long long) PAGE_SHIFT))) +#define pmd_pgtable(pmd) pmd_page(pmd) + +/* + * Allocate and free page tables. + */ +extern pgd_t *pgd_alloc(struct mm_struct *); +extern void pgd_free(struct mm_struct *mm, pgd_t *pgd); + +extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long); +extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long); + +static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) +{ + free_page((unsigned long) pte); +} + +static inline void pte_free(struct mm_struct *mm, pgtable_t pte) +{ + pgtable_page_dtor(pte); + __free_page(pte); +} + +#define __pte_free_tlb(tlb,pte) \ +do { \ + pgtable_page_dtor(pte); \ + tlb_remove_page((tlb),(pte)); \ +} while (0) + +#ifdef CONFIG_3_LEVEL_PGTABLES + +static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) +{ + free_page((unsigned long)pmd); +} + +#define __pmd_free_tlb(tlb,x) tlb_remove_page((tlb),virt_to_page(x)) +#endif + +#define check_pgt_cache() do { } while (0) + +#endif + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff --git a/arch/um/include/asm/pgtable-2level.h b/arch/um/include/asm/pgtable-2level.h new file mode 100644 index 0000000..f534b73 --- /dev/null +++ b/arch/um/include/asm/pgtable-2level.h @@ -0,0 +1,53 @@ +/* + * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) + * Copyright 2003 PathScale, Inc. + * Derived from include/asm-i386/pgtable.h + * Licensed under the GPL + */ + +#ifndef __UM_PGTABLE_2LEVEL_H +#define __UM_PGTABLE_2LEVEL_H + +#include <asm-generic/pgtable-nopmd.h> + +/* PGDIR_SHIFT determines what a third-level page table entry can map */ + +#define PGDIR_SHIFT 22 +#define PGDIR_SIZE (1UL << PGDIR_SHIFT) +#define PGDIR_MASK (~(PGDIR_SIZE-1)) + +/* + * entries per page directory level: the i386 is two-level, so + * we don't really have any PMD directory physically. + */ +#define PTRS_PER_PTE 1024 +#define USER_PTRS_PER_PGD ((TASK_SIZE + (PGDIR_SIZE - 1)) / PGDIR_SIZE) +#define PTRS_PER_PGD 1024 +#define FIRST_USER_ADDRESS 0 + +#define pte_ERROR(e) \ + printk("%s:%d: bad pte %p(%08lx).\n", __FILE__, __LINE__, &(e), \ + pte_val(e)) +#define pgd_ERROR(e) \ + printk("%s:%d: bad pgd %p(%08lx).\n", __FILE__, __LINE__, &(e), \ + pgd_val(e)) + +static inline int pgd_newpage(pgd_t pgd) { return 0; } +static inline void pgd_mkuptodate(pgd_t pgd) { } + +#define set_pmd(pmdptr, pmdval) (*(pmdptr) = (pmdval)) + +#define pte_pfn(x) phys_to_pfn(pte_val(x)) +#define pfn_pte(pfn, prot) __pte(pfn_to_phys(pfn) | pgprot_val(prot)) +#define pfn_pmd(pfn, prot) __pmd(pfn_to_phys(pfn) | pgprot_val(prot)) + +/* + * Bits 0 through 4 are taken + */ +#define PTE_FILE_MAX_BITS 27 + +#define pte_to_pgoff(pte) (pte_val(pte) >> 5) + +#define pgoff_to_pte(off) ((pte_t) { ((off) << 5) + _PAGE_FILE }) + +#endif diff --git a/arch/um/include/asm/pgtable-3level.h b/arch/um/include/asm/pgtable-3level.h new file mode 100644 index 0000000..0446f45 --- /dev/null +++ b/arch/um/include/asm/pgtable-3level.h @@ -0,0 +1,146 @@ +/* + * Copyright 2003 PathScale Inc + * Derived from include/asm-i386/pgtable.h + * Licensed under the GPL + */ + +#ifndef __UM_PGTABLE_3LEVEL_H +#define __UM_PGTABLE_3LEVEL_H + +#include <asm-generic/pgtable-nopud.h> + +/* PGDIR_SHIFT determines what a third-level page table entry can map */ + +#ifdef CONFIG_64BIT +#define PGDIR_SHIFT 30 +#else +#define PGDIR_SHIFT 31 +#endif +#define PGDIR_SIZE (1UL << PGDIR_SHIFT) +#define PGDIR_MASK (~(PGDIR_SIZE-1)) + +/* PMD_SHIFT determines the size of the area a second-level page table can + * map + */ + +#define PMD_SHIFT 21 +#define PMD_SIZE (1UL << PMD_SHIFT) +#define PMD_MASK (~(PMD_SIZE-1)) + +/* + * entries per page directory level + */ + +#define PTRS_PER_PTE 512 +#ifdef CONFIG_64BIT +#define PTRS_PER_PMD 512 +#define PTRS_PER_PGD 512 +#else +#define PTRS_PER_PMD 1024 +#define PTRS_PER_PGD 1024 +#endif + +#define USER_PTRS_PER_PGD ((TASK_SIZE + (PGDIR_SIZE - 1)) / PGDIR_SIZE) +#define FIRST_USER_ADDRESS 0 + +#define pte_ERROR(e) \ + printk("%s:%d: bad pte %p(%016lx).\n", __FILE__, __LINE__, &(e), \ + pte_val(e)) +#define pmd_ERROR(e) \ + printk("%s:%d: bad pmd %p(%016lx).\n", __FILE__, __LINE__, &(e), \ + pmd_val(e)) +#define pgd_ERROR(e) \ + printk("%s:%d: bad pgd %p(%016lx).\n", __FILE__, __LINE__, &(e), \ + pgd_val(e)) + +#define pud_none(x) (!(pud_val(x) & ~_PAGE_NEWPAGE)) +#define pud_bad(x) ((pud_val(x) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE) +#define pud_present(x) (pud_val(x) & _PAGE_PRESENT) +#define pud_populate(mm, pud, pmd) \ + set_pud(pud, __pud(_PAGE_TABLE + __pa(pmd))) + +#ifdef CONFIG_64BIT +#define set_pud(pudptr, pudval) set_64bit((phys_t *) (pudptr), pud_val(pudval)) +#else +#define set_pud(pudptr, pudval) (*(pudptr) = (pudval)) +#endif + +static inline int pgd_newpage(pgd_t pgd) +{ + return(pgd_val(pgd) & _PAGE_NEWPAGE); +} + +static inline void pgd_mkuptodate(pgd_t pgd) { pgd_val(pgd) &= ~_PAGE_NEWPAGE; } + +#ifdef CONFIG_64BIT +#define set_pmd(pmdptr, pmdval) set_64bit((phys_t *) (pmdptr), pmd_val(pmdval)) +#else +#define set_pmd(pmdptr, pmdval) (*(pmdptr) = (pmdval)) +#endif + +struct mm_struct; +extern pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address); + +static inline void pud_clear (pud_t *pud) +{ + set_pud(pud, __pud(_PAGE_NEWPAGE)); +} + +#define pud_page(pud) phys_to_page(pud_val(pud) & PAGE_MASK) +#define pud_page_vaddr(pud) ((unsigned long) __va(pud_val(pud) & PAGE_MASK)) + +/* Find an entry in the second-level page table.. */ +#define pmd_offset(pud, address) ((pmd_t *) pud_page_vaddr(*(pud)) + \ + pmd_index(address)) + +static inline unsigned long pte_pfn(pte_t pte) +{ + return phys_to_pfn(pte_val(pte)); +} + +static inline pte_t pfn_pte(pfn_t page_nr, pgprot_t pgprot) +{ + pte_t pte; + phys_t phys = pfn_to_phys(page_nr); + + pte_set_val(pte, phys, pgprot); + return pte; +} + +static inline pmd_t pfn_pmd(pfn_t page_nr, pgprot_t pgprot) +{ + return __pmd((page_nr << PAGE_SHIFT) | pgprot_val(pgprot)); +} + +/* + * Bits 0 through 3 are taken in the low part of the pte, + * put the 32 bits of offset into the high part. + */ +#define PTE_FILE_MAX_BITS 32 + +#ifdef CONFIG_64BIT + +#define pte_to_pgoff(p) ((p).pte >> 32) + +#define pgoff_to_pte(off) ((pte_t) { ((off) << 32) | _PAGE_FILE }) + +#else + +#define pte_to_pgoff(pte) ((pte).pte_high) + +#define pgoff_to_pte(off) ((pte_t) { _PAGE_FILE, (off) }) + +#endif + +#endif + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h new file mode 100644 index 0000000..58da248 --- /dev/null +++ b/arch/um/include/asm/pgtable.h @@ -0,0 +1,366 @@ +/* + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Copyright 2003 PathScale, Inc. + * Derived from include/asm-i386/pgtable.h + * Licensed under the GPL + */ + +#ifndef __UM_PGTABLE_H +#define __UM_PGTABLE_H + +#include <asm/fixmap.h> + +#define _PAGE_PRESENT 0x001 +#define _PAGE_NEWPAGE 0x002 +#define _PAGE_NEWPROT 0x004 +#define _PAGE_RW 0x020 +#define _PAGE_USER 0x040 +#define _PAGE_ACCESSED 0x080 +#define _PAGE_DIRTY 0x100 +/* If _PAGE_PRESENT is clear, we use these: */ +#define _PAGE_FILE 0x008 /* nonlinear file mapping, saved PTE; unset:swap */ +#define _PAGE_PROTNONE 0x010 /* if the user mapped it with PROT_NONE; + pte_present gives true */ + +#ifdef CONFIG_3_LEVEL_PGTABLES +#include "asm/pgtable-3level.h" +#else +#include "asm/pgtable-2level.h" +#endif + +extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; + +/* zero page used for uninitialized stuff */ +extern unsigned long *empty_zero_page; + +#define pgtable_cache_init() do ; while (0) + +/* Just any arbitrary offset to the start of the vmalloc VM area: the + * current 8MB value just means that there will be a 8MB "hole" after the + * physical memory until the kernel virtual memory starts. That means that + * any out-of-bounds memory accesses will hopefully be caught. + * The vmalloc() routines leaves a hole of 4kB between each vmalloced + * area for the same reason. ;) + */ + +extern unsigned long end_iomem; + +#define VMALLOC_OFFSET (__va_space) +#define VMALLOC_START ((end_iomem + VMALLOC_OFFSET) & ~(VMALLOC_OFFSET-1)) +#define PKMAP_BASE ((FIXADDR_START - LAST_PKMAP * PAGE_SIZE) & PMD_MASK) +#ifdef CONFIG_HIGHMEM +# define VMALLOC_END (PKMAP_BASE-2*PAGE_SIZE) +#else +# define VMALLOC_END (FIXADDR_START-2*PAGE_SIZE) +#endif + +#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY) +#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) +#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY) + +#define PAGE_NONE __pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED) +#define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED) +#define PAGE_COPY __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED) +#define PAGE_READONLY __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED) +#define PAGE_KERNEL __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED) + +/* + * The i386 can't do page protection for execute, and considers that the same + * are read. + * Also, write permissions imply read permissions. This is the closest we can + * get.. + */ +#define __P000 PAGE_NONE +#define __P001 PAGE_READONLY +#define __P010 PAGE_COPY +#define __P011 PAGE_COPY +#define __P100 PAGE_READONLY +#define __P101 PAGE_READONLY +#define __P110 PAGE_COPY +#define __P111 PAGE_COPY + +#define __S000 PAGE_NONE +#define __S001 PAGE_READONLY +#define __S010 PAGE_SHARED +#define __S011 PAGE_SHARED +#define __S100 PAGE_READONLY +#define __S101 PAGE_READONLY +#define __S110 PAGE_SHARED +#define __S111 PAGE_SHARED + +/* + * ZERO_PAGE is a global shared page that is always zero: used + * for zero-mapped memory areas etc.. + */ +#define ZERO_PAGE(vaddr) virt_to_page(empty_zero_page) + +#define pte_clear(mm,addr,xp) pte_set_val(*(xp), (phys_t) 0, __pgprot(_PAGE_NEWPAGE)) + +#define pmd_none(x) (!((unsigned long)pmd_val(x) & ~_PAGE_NEWPAGE)) +#define pmd_bad(x) ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE) + +#define pmd_present(x) (pmd_val(x) & _PAGE_PRESENT) +#define pmd_clear(xp) do { pmd_val(*(xp)) = _PAGE_NEWPAGE; } while (0) + +#define pmd_newpage(x) (pmd_val(x) & _PAGE_NEWPAGE) +#define pmd_mkuptodate(x) (pmd_val(x) &= ~_PAGE_NEWPAGE) + +#define pud_newpage(x) (pud_val(x) & _PAGE_NEWPAGE) +#define pud_mkuptodate(x) (pud_val(x) &= ~_PAGE_NEWPAGE) + +#define pmd_page(pmd) phys_to_page(pmd_val(pmd) & PAGE_MASK) + +#define pte_page(x) pfn_to_page(pte_pfn(x)) + +#define pte_present(x) pte_get_bits(x, (_PAGE_PRESENT | _PAGE_PROTNONE)) + +/* + * ================================= + * Flags checking section. + * ================================= + */ + +static inline int pte_none(pte_t pte) +{ + return pte_is_zero(pte); +} + +/* + * The following only work if pte_present() is true. + * Undefined behaviour if not.. + */ +static inline int pte_read(pte_t pte) +{ + return((pte_get_bits(pte, _PAGE_USER)) && + !(pte_get_bits(pte, _PAGE_PROTNONE))); +} + +static inline int pte_exec(pte_t pte){ + return((pte_get_bits(pte, _PAGE_USER)) && + !(pte_get_bits(pte, _PAGE_PROTNONE))); +} + +static inline int pte_write(pte_t pte) +{ + return((pte_get_bits(pte, _PAGE_RW)) && + !(pte_get_bits(pte, _PAGE_PROTNONE))); +} + +/* + * The following only works if pte_present() is not true. + */ +static inline int pte_file(pte_t pte) +{ + return pte_get_bits(pte, _PAGE_FILE); +} + +static inline int pte_dirty(pte_t pte) +{ + return pte_get_bits(pte, _PAGE_DIRTY); +} + +static inline int pte_young(pte_t pte) +{ + return pte_get_bits(pte, _PAGE_ACCESSED); +} + +static inline int pte_newpage(pte_t pte) +{ + return pte_get_bits(pte, _PAGE_NEWPAGE); +} + +static inline int pte_newprot(pte_t pte) +{ + return(pte_present(pte) && (pte_get_bits(pte, _PAGE_NEWPROT))); +} + +static inline int pte_special(pte_t pte) +{ + return 0; +} + +/* + * ================================= + * Flags setting section. + * ================================= + */ + +static inline pte_t pte_mknewprot(pte_t pte) +{ + pte_set_bits(pte, _PAGE_NEWPROT); + return(pte); +} + +static inline pte_t pte_mkclean(pte_t pte) +{ + pte_clear_bits(pte, _PAGE_DIRTY); + return(pte); +} + +static inline pte_t pte_mkold(pte_t pte) +{ + pte_clear_bits(pte, _PAGE_ACCESSED); + return(pte); +} + +static inline pte_t pte_wrprotect(pte_t pte) +{ + pte_clear_bits(pte, _PAGE_RW); + return(pte_mknewprot(pte)); +} + +static inline pte_t pte_mkread(pte_t pte) +{ + pte_set_bits(pte, _PAGE_USER); + return(pte_mknewprot(pte)); +} + +static inline pte_t pte_mkdirty(pte_t pte) +{ + pte_set_bits(pte, _PAGE_DIRTY); + return(pte); +} + +static inline pte_t pte_mkyoung(pte_t pte) +{ + pte_set_bits(pte, _PAGE_ACCESSED); + return(pte); +} + +static inline pte_t pte_mkwrite(pte_t pte) +{ + pte_set_bits(pte, _PAGE_RW); + return(pte_mknewprot(pte)); +} + +static inline pte_t pte_mkuptodate(pte_t pte) +{ + pte_clear_bits(pte, _PAGE_NEWPAGE); + if(pte_present(pte)) + pte_clear_bits(pte, _PAGE_NEWPROT); + return(pte); +} + +static inline pte_t pte_mknewpage(pte_t pte) +{ + pte_set_bits(pte, _PAGE_NEWPAGE); + return(pte); +} + +static inline pte_t pte_mkspecial(pte_t pte) +{ + return(pte); +} + +static inline void set_pte(pte_t *pteptr, pte_t pteval) +{ + pte_copy(*pteptr, pteval); + + /* If it's a swap entry, it needs to be marked _PAGE_NEWPAGE so + * fix_range knows to unmap it. _PAGE_NEWPROT is specific to + * mapped pages. + */ + + *pteptr = pte_mknewpage(*pteptr); + if(pte_present(*pteptr)) *pteptr = pte_mknewprot(*pteptr); +} +#define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep,pteval) + +/* + * Conversion functions: convert a page and protection to a page entry, + * and a page entry and page directory to the page they refer to. + */ + +#define phys_to_page(phys) pfn_to_page(phys_to_pfn(phys)) +#define __virt_to_page(virt) phys_to_page(__pa(virt)) +#define page_to_phys(page) pfn_to_phys((pfn_t) page_to_pfn(page)) +#define virt_to_page(addr) __virt_to_page((const unsigned long) addr) + +#define mk_pte(page, pgprot) \ + ({ pte_t pte; \ + \ + pte_set_val(pte, page_to_phys(page), (pgprot)); \ + if (pte_present(pte)) \ + pte_mknewprot(pte_mknewpage(pte)); \ + pte;}) + +static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) +{ + pte_set_val(pte, (pte_val(pte) & _PAGE_CHG_MASK), newprot); + return pte; +} + +/* + * the pgd page can be thought of an array like this: pgd_t[PTRS_PER_PGD] + * + * this macro returns the index of the entry in the pgd page which would + * control the given virtual address + */ +#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1)) + +/* + * pgd_offset() returns a (pgd_t *) + * pgd_index() is used get the offset into the pgd page's array of pgd_t's; + */ +#define pgd_offset(mm, address) ((mm)->pgd+pgd_index(address)) + +/* + * a shortcut which implies the use of the kernel's pgd, instead + * of a process's + */ +#define pgd_offset_k(address) pgd_offset(&init_mm, address) + +/* + * the pmd page can be thought of an array like this: pmd_t[PTRS_PER_PMD] + * + * this macro returns the index of the entry in the pmd page which would + * control the given virtual address + */ +#define pmd_page_vaddr(pmd) ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK)) +#define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1)) + +#define pmd_page_vaddr(pmd) \ + ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK)) + +/* + * the pte page can be thought of an array like this: pte_t[PTRS_PER_PTE] + * + * this macro returns the index of the entry in the pte page which would + * control the given virtual address + */ +#define pte_index(address) (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) +#define pte_offset_kernel(dir, address) \ + ((pte_t *) pmd_page_vaddr(*(dir)) + pte_index(address)) +#define pte_offset_map(dir, address) \ + ((pte_t *)page_address(pmd_page(*(dir))) + pte_index(address)) +#define pte_offset_map_nested(dir, address) pte_offset_map(dir, address) +#define pte_unmap(pte) do { } while (0) +#define pte_unmap_nested(pte) do { } while (0) + +struct mm_struct; +extern pte_t *virt_to_pte(struct mm_struct *mm, unsigned long addr); + +#define update_mmu_cache(vma,address,pte) do ; while (0) + +/* Encode and de-code a swap entry */ +#define __swp_type(x) (((x).val >> 4) & 0x3f) +#define __swp_offset(x) ((x).val >> 11) + +#define __swp_entry(type, offset) \ + ((swp_entry_t) { ((type) << 4) | ((offset) << 11) }) +#define __pte_to_swp_entry(pte) \ + ((swp_entry_t) { pte_val(pte_mkuptodate(pte)) }) +#define __swp_entry_to_pte(x) ((pte_t) { (x).val }) + +#define kern_addr_valid(addr) (1) + +#include <asm-generic/pgtable.h> + +/* Clear a kernel PTE and flush it from the TLB */ +#define kpte_clear_flush(ptep, vaddr) \ +do { \ + pte_clear(&init_mm, (vaddr), (ptep)); \ + __flush_tlb_one((vaddr)); \ +} while (0) + +#endif diff --git a/arch/um/include/asm/processor-generic.h b/arch/um/include/asm/processor-generic.h new file mode 100644 index 0000000..bed6688 --- /dev/null +++ b/arch/um/include/asm/processor-generic.h @@ -0,0 +1,136 @@ +/* + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#ifndef __UM_PROCESSOR_GENERIC_H +#define __UM_PROCESSOR_GENERIC_H + +struct pt_regs; + +struct task_struct; + +#include "asm/ptrace.h" +#include "registers.h" +#include "sysdep/archsetjmp.h" + +struct mm_struct; + +struct thread_struct { + struct task_struct *saved_task; + /* + * This flag is set to 1 before calling do_fork (and analyzed in + * copy_thread) to mark that we are begin called from userspace (fork / + * vfork / clone), and reset to 0 after. It is left to 0 when called + * from kernelspace (i.e. kernel_thread() or fork_idle(), + * as of 2.6.11). + */ + int forking; + struct pt_regs regs; + int singlestep_syscall; + void *fault_addr; + jmp_buf *fault_catcher; + struct task_struct *prev_sched; + unsigned long temp_stack; + jmp_buf *exec_buf; + struct arch_thread arch; + jmp_buf switch_buf; + int mm_count; + struct { + int op; + union { + struct { + int pid; + } fork, exec; + struct { + int (*proc)(void *); + void *arg; + } thread; + struct { + void (*proc)(void *); + void *arg; + } cb; + } u; + } request; +}; + +#define INIT_THREAD \ +{ \ + .forking = 0, \ + .regs = EMPTY_REGS, \ + .fault_addr = NULL, \ + .prev_sched = NULL, \ + .temp_stack = 0, \ + .exec_buf = NULL, \ + .arch = INIT_ARCH_THREAD, \ + .request = { 0 } \ +} + +extern struct task_struct *alloc_task_struct(void); + +static inline void release_thread(struct task_struct *task) +{ +} + +extern int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags); + +static inline void prepare_to_copy(struct task_struct *tsk) +{ +} + + +extern unsigned long thread_saved_pc(struct task_struct *t); + +static inline void mm_copy_segments(struct mm_struct *from_mm, + struct mm_struct *new_mm) +{ +} + +#define init_stack (init_thread_union.stack) + +/* + * User space process size: 3GB (default). + */ +extern unsigned long task_size; + +#define TASK_SIZE (task_size) + +#undef STACK_TOP +#undef STACK_TOP_MAX + +extern unsigned long stacksizelim; + +#define STACK_ROOM (stacksizelim) +#define STACK_TOP (TASK_SIZE - 2 * PAGE_SIZE) +#define STACK_TOP_MAX STACK_TOP + +/* This decides where the kernel will search for a free chunk of vm + * space during mmap's. + */ +#define TASK_UNMAPPED_BASE (0x40000000) + +extern void start_thread(struct pt_regs *regs, unsigned long entry, + unsigned long stack); + +struct cpuinfo_um { + unsigned long loops_per_jiffy; + int ipi_pipe[2]; +}; + +extern struct cpuinfo_um boot_cpu_data; + +#define my_cpu_data cpu_data[smp_processor_id()] + +#ifdef CONFIG_SMP +extern struct cpuinfo_um cpu_data[]; +#define current_cpu_data cpu_data[smp_processor_id()] +#else +#define cpu_data (&boot_cpu_data) +#define current_cpu_data boot_cpu_data +#endif + + +#define KSTK_REG(tsk, reg) get_thread_reg(reg, &tsk->thread.switch_buf) +extern unsigned long get_wchan(struct task_struct *p); + +#endif diff --git a/arch/um/include/asm/ptrace-generic.h b/arch/um/include/asm/ptrace-generic.h new file mode 100644 index 0000000..6c88990 --- /dev/null +++ b/arch/um/include/asm/ptrace-generic.h @@ -0,0 +1,55 @@ +/* + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#ifndef __UM_PTRACE_GENERIC_H +#define __UM_PTRACE_GENERIC_H + +#ifndef __ASSEMBLY__ + +#include <asm/ptrace-abi.h> +#include <asm/user.h> +#include "sysdep/ptrace.h" + +struct pt_regs { + struct uml_pt_regs regs; +}; + +#define EMPTY_REGS { .regs = EMPTY_UML_PT_REGS } + +#define PT_REGS_IP(r) UPT_IP(&(r)->regs) +#define PT_REGS_SP(r) UPT_SP(&(r)->regs) + +#define PT_REG(r, reg) UPT_REG(&(r)->regs, reg) +#define PT_REGS_SET(r, reg, val) UPT_SET(&(r)->regs, reg, val) + +#define PT_REGS_SET_SYSCALL_RETURN(r, res) \ + UPT_SET_SYSCALL_RETURN(&(r)->regs, res) +#define PT_REGS_RESTART_SYSCALL(r) UPT_RESTART_SYSCALL(&(r)->regs) + +#define PT_REGS_SYSCALL_NR(r) UPT_SYSCALL_NR(&(r)->regs) + +#define PT_REGS_SC(r) UPT_SC(&(r)->regs) + +#define instruction_pointer(regs) PT_REGS_IP(regs) + +struct task_struct; + +extern long subarch_ptrace(struct task_struct *child, long request, long addr, + long data); +extern unsigned long getreg(struct task_struct *child, int regno); +extern int putreg(struct task_struct *child, int regno, unsigned long value); +extern int get_fpregs(struct user_i387_struct __user *buf, + struct task_struct *child); +extern int set_fpregs(struct user_i387_struct __user *buf, + struct task_struct *child); + +extern void show_regs(struct pt_regs *regs); + +extern int arch_copy_tls(struct task_struct *new); +extern void clear_flushed_tls(struct task_struct *task); + +#endif + +#endif diff --git a/arch/um/include/asm/required-features.h b/arch/um/include/asm/required-features.h new file mode 100644 index 0000000..dfb967b --- /dev/null +++ b/arch/um/include/asm/required-features.h @@ -0,0 +1,9 @@ +#ifndef __UM_REQUIRED_FEATURES_H +#define __UM_REQUIRED_FEATURES_H + +/* + * Nothing to see, just need something for the i386 and x86_64 asm + * headers to include. + */ + +#endif diff --git a/arch/um/include/asm/sections.h b/arch/um/include/asm/sections.h new file mode 100644 index 0000000..6b0231e --- /dev/null +++ b/arch/um/include/asm/sections.h @@ -0,0 +1,7 @@ +#ifndef _UM_SECTIONS_H +#define _UM_SECTIONS_H + +/* nothing to see, move along */ +#include <asm-generic/sections.h> + +#endif diff --git a/arch/um/include/asm/segment.h b/arch/um/include/asm/segment.h new file mode 100644 index 0000000..45183fc --- /dev/null +++ b/arch/um/include/asm/segment.h @@ -0,0 +1,10 @@ +#ifndef __UM_SEGMENT_H +#define __UM_SEGMENT_H + +extern int host_gdt_entry_tls_min; + +#define GDT_ENTRY_TLS_ENTRIES 3 +#define GDT_ENTRY_TLS_MIN host_gdt_entry_tls_min +#define GDT_ENTRY_TLS_MAX (GDT_ENTRY_TLS_MIN + GDT_ENTRY_TLS_ENTRIES - 1) + +#endif diff --git a/arch/um/include/asm/setup.h b/arch/um/include/asm/setup.h new file mode 100644 index 0000000..99f0863 --- /dev/null +++ b/arch/um/include/asm/setup.h @@ -0,0 +1,10 @@ +#ifndef SETUP_H_INCLUDED +#define SETUP_H_INCLUDED + +/* POSIX mandated with _POSIX_ARG_MAX that we can rely on 4096 chars in the + * command line, so this choice is ok. + */ + +#define COMMAND_LINE_SIZE 4096 + +#endif /* SETUP_H_INCLUDED */ diff --git a/arch/um/include/asm/smp.h b/arch/um/include/asm/smp.h new file mode 100644 index 0000000..f27a963 --- /dev/null +++ b/arch/um/include/asm/smp.h @@ -0,0 +1,33 @@ +#ifndef __UM_SMP_H +#define __UM_SMP_H + +#ifdef CONFIG_SMP + +#include "linux/bitops.h" +#include "asm/current.h" +#include "linux/cpumask.h" + +#define raw_smp_processor_id() (current_thread->cpu) + +#define cpu_logical_map(n) (n) +#define cpu_number_map(n) (n) +#define PROC_CHANGE_PENALTY 15 /* Pick a number, any number */ +extern int hard_smp_processor_id(void); +#define NO_PROC_ID -1 + +extern int ncpus; + + +static inline void smp_cpus_done(unsigned int maxcpus) +{ +} + +extern struct task_struct *idle_threads[NR_CPUS]; + +#else + +#define hard_smp_processor_id() 0 + +#endif + +#endif diff --git a/arch/um/include/asm/suspend.h b/arch/um/include/asm/suspend.h new file mode 100644 index 0000000..f4e8e00 --- /dev/null +++ b/arch/um/include/asm/suspend.h @@ -0,0 +1,4 @@ +#ifndef __UM_SUSPEND_H +#define __UM_SUSPEND_H + +#endif diff --git a/arch/um/include/asm/system.h b/arch/um/include/asm/system.h new file mode 100644 index 0000000..753346e --- /dev/null +++ b/arch/um/include/asm/system.h @@ -0,0 +1,35 @@ +#ifndef __UM_SYSTEM_GENERIC_H +#define __UM_SYSTEM_GENERIC_H + +#include "sysdep/system.h" + +extern void *switch_to(void *prev, void *next, void *last); + +extern int get_signals(void); +extern int set_signals(int enable); +extern int get_signals(void); +extern void block_signals(void); +extern void unblock_signals(void); + +#define local_save_flags(flags) do { typecheck(unsigned long, flags); \ + (flags) = get_signals(); } while(0) +#define local_irq_restore(flags) do { typecheck(unsigned long, flags); \ + set_signals(flags); } while(0) + +#define local_irq_save(flags) do { local_save_flags(flags); \ + local_irq_disable(); } while(0) + +#define local_irq_enable() unblock_signals() +#define local_irq_disable() block_signals() + +#define irqs_disabled() \ +({ \ + unsigned long flags; \ + local_save_flags(flags); \ + (flags == 0); \ +}) + +extern void *_switch_to(void *prev, void *next, void *last); +#define switch_to(prev, next, last) prev = _switch_to(prev, next, last) + +#endif diff --git a/arch/um/include/asm/thread_info.h b/arch/um/include/asm/thread_info.h new file mode 100644 index 0000000..62274ab --- /dev/null +++ b/arch/um/include/asm/thread_info.h @@ -0,0 +1,83 @@ +/* + * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#ifndef __UM_THREAD_INFO_H +#define __UM_THREAD_INFO_H + +#ifndef __ASSEMBLY__ + +#include <asm/types.h> +#include <asm/page.h> +#include <asm/uaccess.h> + +struct thread_info { + struct task_struct *task; /* main task structure */ + struct exec_domain *exec_domain; /* execution domain */ + unsigned long flags; /* low level flags */ + __u32 cpu; /* current CPU */ + int preempt_count; /* 0 => preemptable, + <0 => BUG */ + mm_segment_t addr_limit; /* thread address space: + 0-0xBFFFFFFF for user + 0-0xFFFFFFFF for kernel */ + struct restart_block restart_block; + struct thread_info *real_thread; /* Points to non-IRQ stack */ +}; + +#define INIT_THREAD_INFO(tsk) \ +{ \ + .task = &tsk, \ + .exec_domain = &default_exec_domain, \ + .flags = 0, \ + .cpu = 0, \ + .preempt_count = 1, \ + .addr_limit = KERNEL_DS, \ + .restart_block = { \ + .fn = do_no_restart_syscall, \ + }, \ + .real_thread = NULL, \ +} + +#define init_thread_info (init_thread_union.thread_info) +#define init_stack (init_thread_union.stack) + +#define THREAD_SIZE ((1 << CONFIG_KERNEL_STACK_ORDER) * PAGE_SIZE) +/* how to get the thread information struct from C */ +static inline struct thread_info *current_thread_info(void) +{ + struct thread_info *ti; + unsigned long mask = THREAD_SIZE - 1; + ti = (struct thread_info *) (((unsigned long) &ti) & ~mask); + return ti; +} + +#define THREAD_SIZE_ORDER CONFIG_KERNEL_STACK_ORDER + +#endif + +#define PREEMPT_ACTIVE 0x10000000 + +#define TIF_SYSCALL_TRACE 0 /* syscall trace active */ +#define TIF_SIGPENDING 1 /* signal pending */ +#define TIF_NEED_RESCHED 2 /* rescheduling necessary */ +#define TIF_POLLING_NRFLAG 3 /* true if poll_idle() is polling + * TIF_NEED_RESCHED + */ +#define TIF_RESTART_BLOCK 4 +#define TIF_MEMDIE 5 +#define TIF_SYSCALL_AUDIT 6 +#define TIF_RESTORE_SIGMASK 7 +#define TIF_FREEZE 16 /* is freezing for suspend */ + +#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) +#define _TIF_SIGPENDING (1 << TIF_SIGPENDING) +#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) +#define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG) +#define _TIF_MEMDIE (1 << TIF_MEMDIE) +#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) +#define _TIF_RESTORE_SIGMASK (1 << TIF_RESTORE_SIGMASK) +#define _TIF_FREEZE (1 << TIF_FREEZE) + +#endif diff --git a/arch/um/include/asm/timex.h b/arch/um/include/asm/timex.h new file mode 100644 index 0000000..0f4ada0 --- /dev/null +++ b/arch/um/include/asm/timex.h @@ -0,0 +1,13 @@ +#ifndef __UM_TIMEX_H +#define __UM_TIMEX_H + +typedef unsigned long cycles_t; + +static inline cycles_t get_cycles (void) +{ + return 0; +} + +#define CLOCK_TICK_RATE (HZ) + +#endif diff --git a/arch/um/include/asm/tlb.h b/arch/um/include/asm/tlb.h new file mode 100644 index 0000000..5240fa1 --- /dev/null +++ b/arch/um/include/asm/tlb.h @@ -0,0 +1,127 @@ +#ifndef __UM_TLB_H +#define __UM_TLB_H + +#include <linux/pagemap.h> +#include <linux/swap.h> +#include <asm/percpu.h> +#include <asm/pgalloc.h> +#include <asm/tlbflush.h> + +#define tlb_start_vma(tlb, vma) do { } while (0) +#define tlb_end_vma(tlb, vma) do { } while (0) +#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm) + +/* struct mmu_gather is an opaque type used by the mm code for passing around + * any data needed by arch specific code for tlb_remove_page. + */ +struct mmu_gather { + struct mm_struct *mm; + unsigned int need_flush; /* Really unmapped some ptes? */ + unsigned long start; + unsigned long end; + unsigned int fullmm; /* non-zero means full mm flush */ +}; + +/* Users of the generic TLB shootdown code must declare this storage space. */ +DECLARE_PER_CPU(struct mmu_gather, mmu_gathers); + +static inline void __tlb_remove_tlb_entry(struct mmu_gather *tlb, pte_t *ptep, + unsigned long address) +{ + if (tlb->start > address) + tlb->start = address; + if (tlb->end < address + PAGE_SIZE) + tlb->end = address + PAGE_SIZE; +} + +static inline void init_tlb_gather(struct mmu_gather *tlb) +{ + tlb->need_flush = 0; + + tlb->start = TASK_SIZE; + tlb->end = 0; + + if (tlb->fullmm) { + tlb->start = 0; + tlb->end = TASK_SIZE; + } +} + +/* tlb_gather_mmu + * Return a pointer to an initialized struct mmu_gather. + */ +static inline struct mmu_gather * +tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush) +{ + struct mmu_gather *tlb = &get_cpu_var(mmu_gathers); + + tlb->mm = mm; + tlb->fullmm = full_mm_flush; + + init_tlb_gather(tlb); + + return tlb; +} + +extern void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, + unsigned long end); + +static inline void +tlb_flush_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end) +{ + if (!tlb->need_flush) + return; + + flush_tlb_mm_range(tlb->mm, tlb->start, tlb->end); + init_tlb_gather(tlb); +} + +/* tlb_finish_mmu + * Called at the end of the shootdown operation to free up any resources + * that were required. + */ +static inline void +tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end) +{ + tlb_flush_mmu(tlb, start, end); + + /* keep the page table cache within bounds */ + check_pgt_cache(); + + put_cpu_var(mmu_gathers); +} + +/* tlb_remove_page + * Must perform the equivalent to __free_pte(pte_get_and_clear(ptep)), + * while handling the additional races in SMP caused by other CPUs + * caching valid mappings in their TLBs. + */ +static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page) +{ + tlb->need_flush = 1; + free_page_and_swap_cache(page); + return; +} + +/** + * tlb_remove_tlb_entry - remember a pte unmapping for later tlb invalidation. + * + * Record the fact that pte's were really umapped in ->need_flush, so we can + * later optimise away the tlb invalidate. This helps when userspace is + * unmapping already-unmapped pages, which happens quite a lot. + */ +#define tlb_remove_tlb_entry(tlb, ptep, address) \ + do { \ + tlb->need_flush = 1; \ + __tlb_remove_tlb_entry(tlb, ptep, address); \ + } while (0) + +#define pte_free_tlb(tlb, ptep) __pte_free_tlb(tlb, ptep) + +#define pud_free_tlb(tlb, pudp) __pud_free_tlb(tlb, pudp) + +#define pmd_free_tlb(tlb, pmdp) __pmd_free_tlb(tlb, pmdp) + +#define tlb_migrate_finish(mm) do {} while (0) + +#endif diff --git a/arch/um/include/asm/tlbflush.h b/arch/um/include/asm/tlbflush.h new file mode 100644 index 0000000..614f2c0 --- /dev/null +++ b/arch/um/include/asm/tlbflush.h @@ -0,0 +1,31 @@ +/* + * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#ifndef __UM_TLBFLUSH_H +#define __UM_TLBFLUSH_H + +#include <linux/mm.h> + +/* + * TLB flushing: + * + * - flush_tlb() flushes the current mm struct TLBs + * - flush_tlb_all() flushes all processes TLBs + * - flush_tlb_mm(mm) flushes the specified mm context TLB's + * - flush_tlb_page(vma, vmaddr) flushes one page + * - flush_tlb_kernel_vm() flushes the kernel vm area + * - flush_tlb_range(vma, start, end) flushes a range of pages + */ + +extern void flush_tlb_all(void); +extern void flush_tlb_mm(struct mm_struct *mm); +extern void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end); +extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long address); +extern void flush_tlb_kernel_vm(void); +extern void flush_tlb_kernel_range(unsigned long start, unsigned long end); +extern void __flush_tlb_one(unsigned long addr); + +#endif diff --git a/arch/um/include/asm/topology.h b/arch/um/include/asm/topology.h new file mode 100644 index 0000000..0905e4f --- /dev/null +++ b/arch/um/include/asm/topology.h @@ -0,0 +1,6 @@ +#ifndef _ASM_UM_TOPOLOGY_H +#define _ASM_UM_TOPOLOGY_H + +#include <asm-generic/topology.h> + +#endif diff --git a/arch/um/include/asm/uaccess.h b/arch/um/include/asm/uaccess.h new file mode 100644 index 0000000..b9a895d --- /dev/null +++ b/arch/um/include/asm/uaccess.h @@ -0,0 +1,99 @@ +/* + * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __UM_UACCESS_H +#define __UM_UACCESS_H + +#include <asm/errno.h> +#include <asm/processor.h> + +/* thread_info has a mm_segment_t in it, so put the definition up here */ +typedef struct { + unsigned long seg; +} mm_segment_t; + +#include "linux/thread_info.h" + +#define VERIFY_READ 0 +#define VERIFY_WRITE 1 + +/* + * The fs value determines whether argument validity checking should be + * performed or not. If get_fs() == USER_DS, checking is performed, with + * get_fs() == KERNEL_DS, checking is bypassed. + * + * For historical reasons, these macros are grossly misnamed. + */ + +#define MAKE_MM_SEG(s) ((mm_segment_t) { (s) }) + +#define KERNEL_DS MAKE_MM_SEG(0xFFFFFFFF) +#define USER_DS MAKE_MM_SEG(TASK_SIZE) + +#define get_ds() (KERNEL_DS) +#define get_fs() (current_thread_info()->addr_limit) +#define set_fs(x) (current_thread_info()->addr_limit = (x)) + +#define segment_eq(a, b) ((a).seg == (b).seg) + +#include "um_uaccess.h" + +#define __copy_from_user(to, from, n) copy_from_user(to, from, n) + +#define __copy_to_user(to, from, n) copy_to_user(to, from, n) + +#define __copy_to_user_inatomic __copy_to_user +#define __copy_from_user_inatomic __copy_from_user + +#define __get_user(x, ptr) \ +({ \ + const __typeof__(*(ptr)) __user *__private_ptr = (ptr); \ + __typeof__(x) __private_val; \ + int __private_ret = -EFAULT; \ + (x) = (__typeof__(*(__private_ptr)))0; \ + if (__copy_from_user((__force void *)&__private_val, (__private_ptr),\ + sizeof(*(__private_ptr))) == 0) { \ + (x) = (__typeof__(*(__private_ptr))) __private_val; \ + __private_ret = 0; \ + } \ + __private_ret; \ +}) + +#define get_user(x, ptr) \ +({ \ + const __typeof__((*(ptr))) __user *private_ptr = (ptr); \ + (access_ok(VERIFY_READ, private_ptr, sizeof(*private_ptr)) ? \ + __get_user(x, private_ptr) : ((x) = (__typeof__(*ptr))0, -EFAULT)); \ +}) + +#define __put_user(x, ptr) \ +({ \ + __typeof__(*(ptr)) __user *__private_ptr = ptr; \ + __typeof__(*(__private_ptr)) __private_val; \ + int __private_ret = -EFAULT; \ + __private_val = (__typeof__(*(__private_ptr))) (x); \ + if (__copy_to_user((__private_ptr), &__private_val, \ + sizeof(*(__private_ptr))) == 0) { \ + __private_ret = 0; \ + } \ + __private_ret; \ +}) + +#define put_user(x, ptr) \ +({ \ + __typeof__(*(ptr)) __user *private_ptr = (ptr); \ + (access_ok(VERIFY_WRITE, private_ptr, sizeof(*private_ptr)) ? \ + __put_user(x, private_ptr) : -EFAULT); \ +}) + +#define strlen_user(str) strnlen_user(str, ~0U >> 1) + +struct exception_table_entry +{ + unsigned long insn; + unsigned long fixup; +}; + +#endif diff --git a/arch/um/include/asm/xor.h b/arch/um/include/asm/xor.h new file mode 100644 index 0000000..a19db3e --- /dev/null +++ b/arch/um/include/asm/xor.h @@ -0,0 +1,6 @@ +#ifndef __UM_XOR_H +#define __UM_XOR_H + +#include "asm-generic/xor.h" + +#endif diff --git a/arch/um/include/shared/aio.h b/arch/um/include/shared/aio.h new file mode 100644 index 0000000..423bae9 --- /dev/null +++ b/arch/um/include/shared/aio.h @@ -0,0 +1,28 @@ +/* + * Copyright (C) 2004 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef AIO_H__ +#define AIO_H__ + +enum aio_type { AIO_READ, AIO_WRITE, AIO_MMAP }; + +struct aio_thread_reply { + void *data; + int err; +}; + +struct aio_context { + int reply_fd; + struct aio_context *next; +}; + +#define INIT_AIO_CONTEXT { .reply_fd = -1, \ + .next = NULL } + +extern int submit_aio(enum aio_type type, int fd, char *buf, int len, + unsigned long long offset, int reply_fd, + struct aio_context *aio); + +#endif diff --git a/arch/um/include/shared/arch.h b/arch/um/include/shared/arch.h new file mode 100644 index 0000000..2de92a0 --- /dev/null +++ b/arch/um/include/shared/arch.h @@ -0,0 +1,15 @@ +/* + * Copyright (C) 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#ifndef __ARCH_H__ +#define __ARCH_H__ + +#include "sysdep/ptrace.h" + +extern void arch_check_bugs(void); +extern int arch_fixup(unsigned long address, struct uml_pt_regs *regs); +extern void arch_examine_signal(int sig, struct uml_pt_regs *regs); + +#endif diff --git a/arch/um/include/shared/as-layout.h b/arch/um/include/shared/as-layout.h new file mode 100644 index 0000000..a92b678 --- /dev/null +++ b/arch/um/include/shared/as-layout.h @@ -0,0 +1,67 @@ +/* + * Copyright (C) 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#ifndef __START_H__ +#define __START_H__ + +#include "kern_constants.h" + +/* + * Stolen from linux/const.h, which can't be directly included since + * this is used in userspace code, which has no access to the kernel + * headers. Changed to be suitable for adding casts to the start, + * rather than "UL" to the end. + */ + +/* Some constant macros are used in both assembler and + * C code. Therefore we cannot annotate them always with + * 'UL' and other type specifiers unilaterally. We + * use the following macros to deal with this. + */ + +#ifdef __ASSEMBLY__ +#define _UML_AC(X, Y) (Y) +#else +#define __UML_AC(X, Y) (X(Y)) +#define _UML_AC(X, Y) __UML_AC(X, Y) +#endif + +#define STUB_START _UML_AC(, 0x100000) +#define STUB_CODE _UML_AC((unsigned long), STUB_START) +#define STUB_DATA _UML_AC((unsigned long), STUB_CODE + UM_KERN_PAGE_SIZE) +#define STUB_END _UML_AC((unsigned long), STUB_DATA + UM_KERN_PAGE_SIZE) + +#ifndef __ASSEMBLY__ + +#include "sysdep/ptrace.h" + +struct cpu_task { + int pid; + void *task; +}; + +extern struct cpu_task cpu_tasks[]; + +extern unsigned long low_physmem; +extern unsigned long high_physmem; +extern unsigned long uml_physmem; +extern unsigned long uml_reserved; +extern unsigned long end_vm; +extern unsigned long start_vm; +extern unsigned long long highmem; + +extern unsigned long _stext, _etext, _sdata, _edata, __bss_start, _end; +extern unsigned long _unprotected_end; +extern unsigned long brk_start; + +extern unsigned long host_task_size; + +extern int linux_main(int argc, char **argv); + +extern void (*sig_info[])(int, struct uml_pt_regs *); + +#endif + +#endif diff --git a/arch/um/include/shared/chan_kern.h b/arch/um/include/shared/chan_kern.h new file mode 100644 index 0000000..1e65145 --- /dev/null +++ b/arch/um/include/shared/chan_kern.h @@ -0,0 +1,50 @@ +/* + * Copyright (C) 2000, 2001 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __CHAN_KERN_H__ +#define __CHAN_KERN_H__ + +#include "linux/tty.h" +#include "linux/list.h" +#include "linux/console.h" +#include "chan_user.h" +#include "line.h" + +struct chan { + struct list_head list; + struct list_head free_list; + struct line *line; + char *dev; + unsigned int primary:1; + unsigned int input:1; + unsigned int output:1; + unsigned int opened:1; + unsigned int enabled:1; + int fd; + const struct chan_ops *ops; + void *data; +}; + +extern void chan_interrupt(struct list_head *chans, struct delayed_work *task, + struct tty_struct *tty, int irq); +extern int parse_chan_pair(char *str, struct line *line, int device, + const struct chan_opts *opts, char **error_out); +extern int write_chan(struct list_head *chans, const char *buf, int len, + int write_irq); +extern int console_write_chan(struct list_head *chans, const char *buf, + int len); +extern int console_open_chan(struct line *line, struct console *co); +extern void deactivate_chan(struct list_head *chans, int irq); +extern void reactivate_chan(struct list_head *chans, int irq); +extern void chan_enable_winch(struct list_head *chans, struct tty_struct *tty); +extern int enable_chan(struct line *line); +extern void close_chan(struct list_head *chans, int delay_free_irq); +extern int chan_window_size(struct list_head *chans, + unsigned short *rows_out, + unsigned short *cols_out); +extern int chan_config_string(struct list_head *chans, char *str, int size, + char **error_out); + +#endif diff --git a/arch/um/include/shared/chan_user.h b/arch/um/include/shared/chan_user.h new file mode 100644 index 0000000..9b9ced8 --- /dev/null +++ b/arch/um/include/shared/chan_user.h @@ -0,0 +1,55 @@ +/* + * Copyright (C) 2000, 2001 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __CHAN_USER_H__ +#define __CHAN_USER_H__ + +#include "init.h" + +struct chan_opts { + void (*const announce)(char *dev_name, int dev); + char *xterm_title; + const int raw; +}; + +enum chan_init_pri { INIT_STATIC, INIT_ALL, INIT_ONE }; + +struct chan_ops { + char *type; + void *(*init)(char *, int, const struct chan_opts *); + int (*open)(int, int, int, void *, char **); + void (*close)(int, void *); + int (*read)(int, char *, void *); + int (*write)(int, const char *, int, void *); + int (*console_write)(int, const char *, int); + int (*window_size)(int, void *, unsigned short *, unsigned short *); + void (*free)(void *); + int winch; +}; + +extern const struct chan_ops fd_ops, null_ops, port_ops, pts_ops, pty_ops, + tty_ops, xterm_ops; + +extern void generic_close(int fd, void *unused); +extern int generic_read(int fd, char *c_out, void *unused); +extern int generic_write(int fd, const char *buf, int n, void *unused); +extern int generic_console_write(int fd, const char *buf, int n); +extern int generic_window_size(int fd, void *unused, unsigned short *rows_out, + unsigned short *cols_out); +extern void generic_free(void *data); + +struct tty_struct; +extern void register_winch(int fd, struct tty_struct *tty); +extern void register_winch_irq(int fd, int tty_fd, int pid, + struct tty_struct *tty, unsigned long stack); + +#define __channel_help(fn, prefix) \ +__uml_help(fn, prefix "[0-9]*=<channel description>\n" \ +" Attach a console or serial line to a host channel. See\n" \ +" http://user-mode-linux.sourceforge.net/old/input.html for a complete\n" \ +" description of this switch.\n\n" \ +); + +#endif diff --git a/arch/um/include/shared/common-offsets.h b/arch/um/include/shared/common-offsets.h new file mode 100644 index 0000000..72009c7 --- /dev/null +++ b/arch/um/include/shared/common-offsets.h @@ -0,0 +1,54 @@ +/* for use by sys-$SUBARCH/kernel-offsets.c */ + +DEFINE(KERNEL_MADV_REMOVE, MADV_REMOVE); + +OFFSET(HOST_TASK_REGS, task_struct, thread.regs); +OFFSET(HOST_TASK_PID, task_struct, pid); + +DEFINE(UM_KERN_PAGE_SIZE, PAGE_SIZE); +DEFINE(UM_KERN_PAGE_MASK, PAGE_MASK); +DEFINE(UM_KERN_PAGE_SHIFT, PAGE_SHIFT); +DEFINE(UM_NSEC_PER_SEC, NSEC_PER_SEC); + +DEFINE_STR(UM_KERN_EMERG, KERN_EMERG); +DEFINE_STR(UM_KERN_ALERT, KERN_ALERT); +DEFINE_STR(UM_KERN_CRIT, KERN_CRIT); +DEFINE_STR(UM_KERN_ERR, KERN_ERR); +DEFINE_STR(UM_KERN_WARNING, KERN_WARNING); +DEFINE_STR(UM_KERN_NOTICE, KERN_NOTICE); +DEFINE_STR(UM_KERN_INFO, KERN_INFO); +DEFINE_STR(UM_KERN_DEBUG, KERN_DEBUG); +DEFINE_STR(UM_KERN_CONT, KERN_CONT); + +DEFINE(UM_ELF_CLASS, ELF_CLASS); +DEFINE(UM_ELFCLASS32, ELFCLASS32); +DEFINE(UM_ELFCLASS64, ELFCLASS64); + +DEFINE(UM_NR_CPUS, NR_CPUS); + +DEFINE(UM_GFP_KERNEL, GFP_KERNEL); +DEFINE(UM_GFP_ATOMIC, GFP_ATOMIC); + +/* For crypto assembler code. */ +DEFINE(crypto_tfm_ctx_offset, offsetof(struct crypto_tfm, __crt_ctx)); + +DEFINE(UM_THREAD_SIZE, THREAD_SIZE); + +DEFINE(UM_HZ, HZ); + +DEFINE(UM_USEC_PER_SEC, USEC_PER_SEC); +DEFINE(UM_NSEC_PER_SEC, NSEC_PER_SEC); +DEFINE(UM_NSEC_PER_USEC, NSEC_PER_USEC); + +#ifdef CONFIG_PRINTK +DEFINE(UML_CONFIG_PRINTK, CONFIG_PRINTK); +#endif +#ifdef CONFIG_NO_HZ +DEFINE(UML_CONFIG_NO_HZ, CONFIG_NO_HZ); +#endif +#ifdef CONFIG_UML_X86 +DEFINE(UML_CONFIG_UML_X86, CONFIG_UML_X86); +#endif +#ifdef CONFIG_64BIT +DEFINE(UML_CONFIG_64BIT, CONFIG_64BIT); +#endif diff --git a/arch/um/include/shared/elf_user.h b/arch/um/include/shared/elf_user.h new file mode 100644 index 0000000..53516b6 --- /dev/null +++ b/arch/um/include/shared/elf_user.h @@ -0,0 +1,19 @@ +/* + * Copyright (C) 2004 Fujitsu Siemens Computers GmbH + * Author: Bodo Stroesser <bstroesser@fujitsu-siemens.com> + * Licensed under the GPL + */ + +#ifndef __ELF_USER_H__ +#define __ELF_USER_H__ + +/* For compilation on a host that doesn't support AT_SYSINFO (Linux 2.4) */ + +#ifndef AT_SYSINFO +#define AT_SYSINFO 32 +#endif +#ifndef AT_SYSINFO_EHDR +#define AT_SYSINFO_EHDR 33 +#endif + +#endif diff --git a/arch/um/include/shared/frame_kern.h b/arch/um/include/shared/frame_kern.h new file mode 100644 index 0000000..ce9514f --- /dev/null +++ b/arch/um/include/shared/frame_kern.h @@ -0,0 +1,32 @@ +/* + * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __FRAME_KERN_H_ +#define __FRAME_KERN_H_ + +#define _S(nr) (1<<((nr)-1)) +#define _BLOCKABLE (~(_S(SIGKILL) | _S(SIGSTOP))) + +extern int setup_signal_stack_sc(unsigned long stack_top, int sig, + struct k_sigaction *ka, + struct pt_regs *regs, + sigset_t *mask); +extern int setup_signal_stack_si(unsigned long stack_top, int sig, + struct k_sigaction *ka, + struct pt_regs *regs, siginfo_t *info, + sigset_t *mask); + +#endif + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff --git a/arch/um/include/shared/init.h b/arch/um/include/shared/init.h new file mode 100644 index 0000000..37dd097 --- /dev/null +++ b/arch/um/include/shared/init.h @@ -0,0 +1,151 @@ +#ifndef _LINUX_UML_INIT_H +#define _LINUX_UML_INIT_H + +/* These macros are used to mark some functions or + * initialized data (doesn't apply to uninitialized data) + * as `initialization' functions. The kernel can take this + * as hint that the function is used only during the initialization + * phase and free up used memory resources after + * + * Usage: + * For functions: + * + * You should add __init immediately before the function name, like: + * + * static void __init initme(int x, int y) + * { + * extern int z; z = x * y; + * } + * + * If the function has a prototype somewhere, you can also add + * __init between closing brace of the prototype and semicolon: + * + * extern int initialize_foobar_device(int, int, int) __init; + * + * For initialized data: + * You should insert __initdata between the variable name and equal + * sign followed by value, e.g.: + * + * static int init_variable __initdata = 0; + * static char linux_logo[] __initdata = { 0x32, 0x36, ... }; + * + * Don't forget to initialize data not at file scope, i.e. within a function, + * as gcc otherwise puts the data into the bss section and not into the init + * section. + * + * Also note, that this data cannot be "const". + */ + +#ifndef _LINUX_INIT_H +typedef int (*initcall_t)(void); +typedef void (*exitcall_t)(void); + +#ifndef __KERNEL__ +#ifndef __section +# define __section(S) __attribute__ ((__section__(#S))) +#endif + +#if __GNUC__ == 3 + +#if __GNUC_MINOR__ >= 3 +# define __used __attribute__((__used__)) +#else +# define __used __attribute__((__unused__)) +#endif + +#else +#if __GNUC__ == 4 +# define __used __attribute__((__used__)) +#endif +#endif + +#else +#include <linux/compiler.h> +#endif +/* These are for everybody (although not all archs will actually + discard it in modules) */ +#define __init __section(.init.text) +#define __initdata __section(.init.data) +#define __exitdata __section(.exit.data) +#define __exit_call __used __section(.exitcall.exit) + +#ifdef MODULE +#define __exit __section(.exit.text) +#else +#define __exit __used __section(.exit.text) +#endif + +#endif + +#ifndef MODULE +struct uml_param { + const char *str; + int (*setup_func)(char *, int *); +}; + +extern initcall_t __uml_initcall_start, __uml_initcall_end; +extern initcall_t __uml_postsetup_start, __uml_postsetup_end; +extern const char *__uml_help_start, *__uml_help_end; +#endif + +#define __uml_initcall(fn) \ + static initcall_t __uml_initcall_##fn __uml_init_call = fn + +#define __uml_exitcall(fn) \ + static exitcall_t __uml_exitcall_##fn __uml_exit_call = fn + +extern struct uml_param __uml_setup_start, __uml_setup_end; + +#define __uml_postsetup(fn) \ + static initcall_t __uml_postsetup_##fn __uml_postsetup_call = fn + +#define __non_empty_string(dummyname,string) \ + struct __uml_non_empty_string_struct_##dummyname \ + { \ + char _string[sizeof(string)-2]; \ + } + +#ifndef MODULE +#define __uml_setup(str, fn, help...) \ + __non_empty_string(fn ##_setup, str); \ + __uml_help(fn, help); \ + static char __uml_setup_str_##fn[] __initdata = str; \ + static struct uml_param __uml_setup_##fn __uml_init_setup = { __uml_setup_str_##fn, fn } +#else +#define __uml_setup(str, fn, help...) \ + +#endif + +#define __uml_help(fn, help...) \ + __non_empty_string(fn ##__help, help); \ + static char __uml_help_str_##fn[] __initdata = help; \ + static const char *__uml_help_##fn __uml_setup_help = __uml_help_str_##fn + +/* + * Mark functions and data as being only used at initialization + * or exit time. + */ +#define __uml_init_setup __used __section(.uml.setup.init) +#define __uml_setup_help __used __section(.uml.help.init) +#define __uml_init_call __used __section(.uml.initcall.init) +#define __uml_postsetup_call __used __section(.uml.postsetup.init) +#define __uml_exit_call __used __section(.uml.exitcall.exit) + +#ifndef __KERNEL__ + +#define __define_initcall(level,fn) \ + static initcall_t __initcall_##fn __used \ + __attribute__((__section__(".initcall" level ".init"))) = fn + +/* Userspace initcalls shouldn't depend on anything in the kernel, so we'll + * make them run first. + */ +#define __initcall(fn) __define_initcall("1", fn) + +#define __exitcall(fn) static exitcall_t __exitcall_##fn __exit_call = fn + +#define __init_call __used __section(.initcall.init) + +#endif + +#endif /* _LINUX_UML_INIT_H */ diff --git a/arch/um/include/shared/initrd.h b/arch/um/include/shared/initrd.h new file mode 100644 index 0000000..439b9a8 --- /dev/null +++ b/arch/um/include/shared/initrd.h @@ -0,0 +1,22 @@ +/* + * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __INITRD_USER_H__ +#define __INITRD_USER_H__ + +extern int load_initrd(char *filename, void *buf, int size); + +#endif + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff --git a/arch/um/include/shared/irq_kern.h b/arch/um/include/shared/irq_kern.h new file mode 100644 index 0000000..fba3895 --- /dev/null +++ b/arch/um/include/shared/irq_kern.h @@ -0,0 +1,28 @@ +/* + * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __IRQ_KERN_H__ +#define __IRQ_KERN_H__ + +#include "linux/interrupt.h" +#include "asm/ptrace.h" + +extern int um_request_irq(unsigned int irq, int fd, int type, + irq_handler_t handler, + unsigned long irqflags, const char * devname, + void *dev_id); + +#endif + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff --git a/arch/um/include/shared/irq_user.h b/arch/um/include/shared/irq_user.h new file mode 100644 index 0000000..c6c784d --- /dev/null +++ b/arch/um/include/shared/irq_user.h @@ -0,0 +1,30 @@ +/* + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#ifndef __IRQ_USER_H__ +#define __IRQ_USER_H__ + +#include "sysdep/ptrace.h" + +struct irq_fd { + struct irq_fd *next; + void *id; + int fd; + int type; + int irq; + int events; + int current_events; +}; + +enum { IRQ_READ, IRQ_WRITE }; + +extern void sigio_handler(int sig, struct uml_pt_regs *regs); +extern void free_irq_by_fd(int fd); +extern void reactivate_fd(int fd, int irqnum); +extern void deactivate_fd(int fd, int irqnum); +extern int deactivate_all_fds(void); +extern int activate_ipi(int fd, int pid); + +#endif diff --git a/arch/um/include/shared/kern.h b/arch/um/include/shared/kern.h new file mode 100644 index 0000000..4ce3fc6 --- /dev/null +++ b/arch/um/include/shared/kern.h @@ -0,0 +1,40 @@ +/* + * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __KERN_H__ +#define __KERN_H__ + +/* These are all user-mode things which are convenient to call directly + * from kernel code and for which writing a wrapper is too much of a pain. + * The regular include files can't be included because this file is included + * only into kernel code, and user-space includes conflict with kernel + * includes. + */ + +extern int errno; + +extern int clone(int (*proc)(void *), void *sp, int flags, void *data); +extern int sleep(int); +extern int printf(const char *fmt, ...); +extern char *strerror(int errnum); +extern char *ptsname(int __fd); +extern int munmap(void *, int); +extern void *sbrk(int increment); +extern void *malloc(int size); +extern void perror(char *err); +extern int kill(int pid, int sig); +extern int getuid(void); +extern int getgid(void); +extern int pause(void); +extern int write(int, const void *, int); +extern void exit(int); +extern int close(int); +extern int read(unsigned int, char *, int); +extern int pipe(int *); +extern int sched_yield(void); +extern int ptrace(int op, int pid, long addr, long data); + +#endif + diff --git a/arch/um/include/shared/kern_util.h b/arch/um/include/shared/kern_util.h new file mode 100644 index 0000000..3c34122 --- /dev/null +++ b/arch/um/include/shared/kern_util.h @@ -0,0 +1,69 @@ +/* + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#ifndef __KERN_UTIL_H__ +#define __KERN_UTIL_H__ + +#include "sysdep/ptrace.h" +#include "sysdep/faultinfo.h" + +extern int uml_exitcode; + +extern int ncpus; +extern int kmalloc_ok; + +#define UML_ROUND_UP(addr) \ + ((((unsigned long) addr) + PAGE_SIZE - 1) & PAGE_MASK) + +extern unsigned long alloc_stack(int order, int atomic); +extern void free_stack(unsigned long stack, int order); + +extern int do_signal(void); +extern void copy_sc(struct uml_pt_regs *regs, void *from); +extern void interrupt_end(void); +extern void relay_signal(int sig, struct uml_pt_regs *regs); + +extern unsigned long segv(struct faultinfo fi, unsigned long ip, + int is_user, struct uml_pt_regs *regs); +extern int handle_page_fault(unsigned long address, unsigned long ip, + int is_write, int is_user, int *code_out); + +extern unsigned int do_IRQ(int irq, struct uml_pt_regs *regs); +extern int smp_sigio_handler(void); +extern void initial_thread_cb(void (*proc)(void *), void *arg); +extern int is_syscall(unsigned long addr); +extern void timer_handler(int sig, struct uml_pt_regs *regs); + +extern void timer_handler(int sig, struct uml_pt_regs *regs); + +extern int start_uml(void); +extern void paging_init(void); + +extern void uml_cleanup(void); +extern void do_uml_exitcalls(void); + +/* + * Are we disallowed to sleep? Used to choose between GFP_KERNEL and + * GFP_ATOMIC. + */ +extern int __cant_sleep(void); +extern void *get_current(void); +extern int copy_from_user_proc(void *to, void *from, int size); +extern int cpu(void); +extern char *uml_strdup(const char *string); + +extern unsigned long to_irq_stack(unsigned long *mask_out); +extern unsigned long from_irq_stack(int nested); + +extern void syscall_trace(struct uml_pt_regs *regs, int entryexit); +extern int singlestepping(void *t); + +extern void segv_handler(int sig, struct uml_pt_regs *regs); +extern void bus_handler(int sig, struct uml_pt_regs *regs); +extern void winch(int sig, struct uml_pt_regs *regs); +extern void fatal_sigsegv(void) __attribute__ ((noreturn)); + + +#endif diff --git a/arch/um/include/shared/ldt.h b/arch/um/include/shared/ldt.h new file mode 100644 index 0000000..a7f999a --- /dev/null +++ b/arch/um/include/shared/ldt.h @@ -0,0 +1,37 @@ +/* + * Copyright (C) 2004 Fujitsu Siemens Computers GmbH + * Licensed under the GPL + * + * Author: Bodo Stroesser <bstroesser@fujitsu-siemens.com> + */ + +#ifndef __ASM_LDT_H +#define __ASM_LDT_H + +#include <linux/mutex.h> +#include <sysdep/host_ldt.h> + +extern void ldt_host_info(void); + +#define LDT_PAGES_MAX \ + ((LDT_ENTRIES * LDT_ENTRY_SIZE)/PAGE_SIZE) +#define LDT_ENTRIES_PER_PAGE \ + (PAGE_SIZE/LDT_ENTRY_SIZE) +#define LDT_DIRECT_ENTRIES \ + ((LDT_PAGES_MAX*sizeof(void *))/LDT_ENTRY_SIZE) + +struct ldt_entry { + __u32 a; + __u32 b; +}; + +typedef struct uml_ldt { + int entry_count; + struct mutex lock; + union { + struct ldt_entry * pages[LDT_PAGES_MAX]; + struct ldt_entry entries[LDT_DIRECT_ENTRIES]; + } u; +} uml_ldt_t; + +#endif diff --git a/arch/um/include/shared/line.h b/arch/um/include/shared/line.h new file mode 100644 index 0000000..311a0d3 --- /dev/null +++ b/arch/um/include/shared/line.h @@ -0,0 +1,105 @@ +/* + * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __LINE_H__ +#define __LINE_H__ + +#include "linux/list.h" +#include "linux/workqueue.h" +#include "linux/tty.h" +#include "linux/interrupt.h" +#include "linux/spinlock.h" +#include "linux/mutex.h" +#include "chan_user.h" +#include "mconsole_kern.h" + +/* There's only one modifiable field in this - .mc.list */ +struct line_driver { + const char *name; + const char *device_name; + const short major; + const short minor_start; + const short type; + const short subtype; + const int read_irq; + const char *read_irq_name; + const int write_irq; + const char *write_irq_name; + struct mc_device mc; +}; + +struct line { + struct tty_struct *tty; + spinlock_t count_lock; + int valid; + + char *init_str; + int init_pri; + struct list_head chan_list; + + /*This lock is actually, mostly, local to*/ + spinlock_t lock; + int throttled; + /* Yes, this is a real circular buffer. + * XXX: And this should become a struct kfifo! + * + * buffer points to a buffer allocated on demand, of length + * LINE_BUFSIZE, head to the start of the ring, tail to the end.*/ + char *buffer; + char *head; + char *tail; + + int sigio; + struct delayed_work task; + const struct line_driver *driver; + int have_irq; +}; + +#define LINE_INIT(str, d) \ + { .count_lock = __SPIN_LOCK_UNLOCKED((str).count_lock), \ + .init_str = str, \ + .init_pri = INIT_STATIC, \ + .valid = 1, \ + .lock = __SPIN_LOCK_UNLOCKED((str).lock), \ + .driver = d } + +extern void line_close(struct tty_struct *tty, struct file * filp); +extern int line_open(struct line *lines, struct tty_struct *tty); +extern int line_setup(struct line *lines, unsigned int sizeof_lines, + char *init, char **error_out); +extern int line_write(struct tty_struct *tty, const unsigned char *buf, + int len); +extern int line_put_char(struct tty_struct *tty, unsigned char ch); +extern void line_set_termios(struct tty_struct *tty, struct ktermios * old); +extern int line_chars_in_buffer(struct tty_struct *tty); +extern void line_flush_buffer(struct tty_struct *tty); +extern void line_flush_chars(struct tty_struct *tty); +extern int line_write_room(struct tty_struct *tty); +extern int line_ioctl(struct tty_struct *tty, struct file * file, + unsigned int cmd, unsigned long arg); +extern void line_throttle(struct tty_struct *tty); +extern void line_unthrottle(struct tty_struct *tty); + +extern char *add_xterm_umid(char *base); +extern int line_setup_irq(int fd, int input, int output, struct line *line, + void *data); +extern void line_close_chan(struct line *line); +extern struct tty_driver *register_lines(struct line_driver *line_driver, + const struct tty_operations *driver, + struct line *lines, int nlines); +extern void lines_init(struct line *lines, int nlines, struct chan_opts *opts); +extern void close_lines(struct line *lines, int nlines); + +extern int line_config(struct line *lines, unsigned int sizeof_lines, + char *str, const struct chan_opts *opts, + char **error_out); +extern int line_id(char **str, int *start_out, int *end_out); +extern int line_remove(struct line *lines, unsigned int sizeof_lines, int n, + char **error_out); +extern int line_get_config(char *dev, struct line *lines, + unsigned int sizeof_lines, char *str, + int size, char **error_out); + +#endif diff --git a/arch/um/include/shared/longjmp.h b/arch/um/include/shared/longjmp.h new file mode 100644 index 0000000..e860bc5 --- /dev/null +++ b/arch/um/include/shared/longjmp.h @@ -0,0 +1,23 @@ +#ifndef __UML_LONGJMP_H +#define __UML_LONGJMP_H + +#include "sysdep/archsetjmp.h" +#include "os.h" + +extern int setjmp(jmp_buf); +extern void longjmp(jmp_buf, int); + +#define UML_LONGJMP(buf, val) do { \ + longjmp(*buf, val); \ +} while(0) + +#define UML_SETJMP(buf) ({ \ + int n; \ + volatile int enable; \ + enable = get_signals(); \ + n = setjmp(*buf); \ + if(n != 0) \ + set_signals(enable); \ + n; }) + +#endif diff --git a/arch/um/include/shared/mconsole.h b/arch/um/include/shared/mconsole.h new file mode 100644 index 0000000..c139ae1 --- /dev/null +++ b/arch/um/include/shared/mconsole.h @@ -0,0 +1,98 @@ +/* + * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#ifndef __MCONSOLE_H__ +#define __MCONSOLE_H__ + +#ifndef __KERNEL__ +#include <stdint.h> +#define u32 uint32_t +#endif + +#include "sysdep/ptrace.h" + +#define MCONSOLE_MAGIC (0xcafebabe) +#define MCONSOLE_MAX_DATA (512) +#define MCONSOLE_VERSION 2 + +struct mconsole_request { + u32 magic; + u32 version; + u32 len; + char data[MCONSOLE_MAX_DATA]; +}; + +struct mconsole_reply { + u32 err; + u32 more; + u32 len; + char data[MCONSOLE_MAX_DATA]; +}; + +struct mconsole_notify { + u32 magic; + u32 version; + enum { MCONSOLE_SOCKET, MCONSOLE_PANIC, MCONSOLE_HANG, + MCONSOLE_USER_NOTIFY } type; + u32 len; + char data[MCONSOLE_MAX_DATA]; +}; + +struct mc_request; + +enum mc_context { MCONSOLE_INTR, MCONSOLE_PROC }; + +struct mconsole_command +{ + char *command; + void (*handler)(struct mc_request *req); + enum mc_context context; +}; + +struct mc_request +{ + int len; + int as_interrupt; + + int originating_fd; + unsigned int originlen; + unsigned char origin[128]; /* sockaddr_un */ + + struct mconsole_request request; + struct mconsole_command *cmd; + struct uml_pt_regs regs; +}; + +extern char mconsole_socket_name[]; + +extern int mconsole_unlink_socket(void); +extern int mconsole_reply_len(struct mc_request *req, const char *reply, + int len, int err, int more); +extern int mconsole_reply(struct mc_request *req, const char *str, int err, + int more); + +extern void mconsole_version(struct mc_request *req); +extern void mconsole_help(struct mc_request *req); +extern void mconsole_halt(struct mc_request *req); +extern void mconsole_reboot(struct mc_request *req); +extern void mconsole_config(struct mc_request *req); +extern void mconsole_remove(struct mc_request *req); +extern void mconsole_sysrq(struct mc_request *req); +extern void mconsole_cad(struct mc_request *req); +extern void mconsole_stop(struct mc_request *req); +extern void mconsole_go(struct mc_request *req); +extern void mconsole_log(struct mc_request *req); +extern void mconsole_proc(struct mc_request *req); +extern void mconsole_stack(struct mc_request *req); + +extern int mconsole_get_request(int fd, struct mc_request *req); +extern int mconsole_notify(char *sock_name, int type, const void *data, + int len); +extern char *mconsole_notify_socket(void); +extern void lock_notify(void); +extern void unlock_notify(void); + +#endif diff --git a/arch/um/include/shared/mconsole_kern.h b/arch/um/include/shared/mconsole_kern.h new file mode 100644 index 0000000..d2fe07e --- /dev/null +++ b/arch/um/include/shared/mconsole_kern.h @@ -0,0 +1,52 @@ +/* + * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __MCONSOLE_KERN_H__ +#define __MCONSOLE_KERN_H__ + +#include "linux/list.h" +#include "mconsole.h" + +struct mconsole_entry { + struct list_head list; + struct mc_request request; +}; + +/* All these methods are called in process context. */ +struct mc_device { + struct list_head list; + char *name; + int (*config)(char *, char **); + int (*get_config)(char *, char *, int, char **); + int (*id)(char **, int *, int *); + int (*remove)(int, char **); +}; + +#define CONFIG_CHUNK(str, size, current, chunk, end) \ +do { \ + current += strlen(chunk); \ + if(current >= size) \ + str = NULL; \ + if(str != NULL){ \ + strcpy(str, chunk); \ + str += strlen(chunk); \ + } \ + if(end) \ + current++; \ +} while(0) + +#ifdef CONFIG_MCONSOLE + +extern void mconsole_register_dev(struct mc_device *new); + +#else + +static inline void mconsole_register_dev(struct mc_device *new) +{ +} + +#endif + +#endif diff --git a/arch/um/include/shared/mem.h b/arch/um/include/shared/mem.h new file mode 100644 index 0000000..5cd40e9 --- /dev/null +++ b/arch/um/include/shared/mem.h @@ -0,0 +1,22 @@ +/* + * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#ifndef __MEM_H__ +#define __MEM_H__ + +extern int phys_mapping(unsigned long phys, unsigned long long *offset_out); + +extern unsigned long uml_physmem; +static inline unsigned long to_phys(void *virt) +{ + return(((unsigned long) virt) - uml_physmem); +} + +static inline void *to_virt(unsigned long phys) +{ + return((void *) uml_physmem + phys); +} + +#endif diff --git a/arch/um/include/shared/mem_kern.h b/arch/um/include/shared/mem_kern.h new file mode 100644 index 0000000..cb7e196 --- /dev/null +++ b/arch/um/include/shared/mem_kern.h @@ -0,0 +1,30 @@ +/* + * Copyright (C) 2003 Jeff Dike (jdike@addtoit.com) + * Licensed under the GPL + */ + +#ifndef __MEM_KERN_H__ +#define __MEM_KERN_H__ + +#include "linux/list.h" +#include "linux/types.h" + +struct remapper { + struct list_head list; + int (*proc)(int, unsigned long, int, __u64); +}; + +extern void register_remapper(struct remapper *info); + +#endif + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff --git a/arch/um/include/shared/mem_user.h b/arch/um/include/shared/mem_user.h new file mode 100644 index 0000000..46384ac --- /dev/null +++ b/arch/um/include/shared/mem_user.h @@ -0,0 +1,62 @@ +/* + * arch/um/include/mem_user.h + * + * BRIEF MODULE DESCRIPTION + * user side memory interface for support IO memory inside user mode linux + * + * Copyright (C) 2001 RidgeRun, Inc. + * Author: RidgeRun, Inc. + * Greg Lonnon glonnon@ridgerun.com or info@ridgerun.com + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN + * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef _MEM_USER_H +#define _MEM_USER_H + +struct iomem_region { + struct iomem_region *next; + char *driver; + int fd; + int size; + unsigned long phys; + unsigned long virt; +}; + +extern struct iomem_region *iomem_regions; +extern int iomem_size; + +#define ROUND_4M(n) ((((unsigned long) (n)) + (1 << 22)) & ~((1 << 22) - 1)) + +extern int init_mem_user(void); +extern void setup_memory(void *entry); +extern unsigned long find_iomem(char *driver, unsigned long *len_out); +extern int init_maps(unsigned long physmem, unsigned long iomem, + unsigned long highmem); +extern unsigned long get_vm(unsigned long len); +extern void setup_physmem(unsigned long start, unsigned long usable, + unsigned long len, unsigned long long highmem); +extern void add_iomem(char *name, int fd, unsigned long size); +extern unsigned long phys_offset(unsigned long phys); +extern void map_memory(unsigned long virt, unsigned long phys, + unsigned long len, int r, int w, int x); + +#endif diff --git a/arch/um/include/shared/net_kern.h b/arch/um/include/shared/net_kern.h new file mode 100644 index 0000000..d843c79 --- /dev/null +++ b/arch/um/include/shared/net_kern.h @@ -0,0 +1,70 @@ +/* + * Copyright (C) 2002 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#ifndef __UM_NET_KERN_H +#define __UM_NET_KERN_H + +#include <linux/netdevice.h> +#include <linux/platform_device.h> +#include <linux/skbuff.h> +#include <linux/socket.h> +#include <linux/list.h> +#include <linux/workqueue.h> + +struct uml_net { + struct list_head list; + struct net_device *dev; + struct platform_device pdev; + int index; + unsigned char mac[ETH_ALEN]; +}; + +struct uml_net_private { + struct list_head list; + spinlock_t lock; + struct net_device *dev; + struct timer_list tl; + struct net_device_stats stats; + struct work_struct work; + int fd; + unsigned char mac[ETH_ALEN]; + int max_packet; + unsigned short (*protocol)(struct sk_buff *); + int (*open)(void *); + void (*close)(int, void *); + void (*remove)(void *); + int (*read)(int, struct sk_buff *skb, struct uml_net_private *); + int (*write)(int, struct sk_buff *skb, struct uml_net_private *); + + void (*add_address)(unsigned char *, unsigned char *, void *); + void (*delete_address)(unsigned char *, unsigned char *, void *); + char user[0]; +}; + +struct net_kern_info { + void (*init)(struct net_device *, void *); + unsigned short (*protocol)(struct sk_buff *); + int (*read)(int, struct sk_buff *skb, struct uml_net_private *); + int (*write)(int, struct sk_buff *skb, struct uml_net_private *); +}; + +struct transport { + struct list_head list; + const char *name; + int (* const setup)(char *, char **, void *); + const struct net_user_info *user; + const struct net_kern_info *kern; + const int private_size; + const int setup_size; +}; + +extern struct net_device *ether_init(int); +extern unsigned short ether_protocol(struct sk_buff *); +extern int tap_setup_common(char *str, char *type, char **dev_name, + char **mac_out, char **gate_addr); +extern void register_transport(struct transport *new); +extern unsigned short eth_protocol(struct sk_buff *skb); + +#endif diff --git a/arch/um/include/shared/net_user.h b/arch/um/include/shared/net_user.h new file mode 100644 index 0000000..63bee15 --- /dev/null +++ b/arch/um/include/shared/net_user.h @@ -0,0 +1,53 @@ +/* + * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#ifndef __UM_NET_USER_H__ +#define __UM_NET_USER_H__ + +#define ETH_ADDR_LEN (6) +#define ETH_HEADER_ETHERTAP (16) +#define ETH_HEADER_OTHER (14) +#define ETH_MAX_PACKET (1500) + +#define UML_NET_VERSION (4) + +struct net_user_info { + int (*init)(void *, void *); + int (*open)(void *); + void (*close)(int, void *); + void (*remove)(void *); + void (*add_address)(unsigned char *, unsigned char *, void *); + void (*delete_address)(unsigned char *, unsigned char *, void *); + int max_packet; + int mtu; +}; + +extern void ether_user_init(void *data, void *dev); +extern void iter_addresses(void *d, void (*cb)(unsigned char *, + unsigned char *, void *), + void *arg); + +extern void *get_output_buffer(int *len_out); +extern void free_output_buffer(void *buffer); + +extern int tap_open_common(void *dev, char *gate_addr); +extern void tap_check_ips(char *gate_addr, unsigned char *eth_addr); + +extern void read_output(int fd, char *output_out, int len); + +extern int net_read(int fd, void *buf, int len); +extern int net_recvfrom(int fd, void *buf, int len); +extern int net_write(int fd, void *buf, int len); +extern int net_send(int fd, void *buf, int len); +extern int net_sendto(int fd, void *buf, int len, void *to, int sock_len); + +extern void open_addr(unsigned char *addr, unsigned char *netmask, void *arg); +extern void close_addr(unsigned char *addr, unsigned char *netmask, void *arg); + +extern char *split_if_spec(char *str, ...); + +extern int dev_netmask(void *d, void *m); + +#endif diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h new file mode 100644 index 0000000..cd40fdd --- /dev/null +++ b/arch/um/include/shared/os.h @@ -0,0 +1,303 @@ +/* + * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#ifndef __OS_H__ +#define __OS_H__ + +#include <stdarg.h> +#include "irq_user.h" +#include "longjmp.h" +#include "mm_id.h" +#include "sysdep/tls.h" + +#define CATCH_EINTR(expr) while ((errno = 0, ((expr) < 0)) && (errno == EINTR)) + +#define OS_TYPE_FILE 1 +#define OS_TYPE_DIR 2 +#define OS_TYPE_SYMLINK 3 +#define OS_TYPE_CHARDEV 4 +#define OS_TYPE_BLOCKDEV 5 +#define OS_TYPE_FIFO 6 +#define OS_TYPE_SOCK 7 + +/* os_access() flags */ +#define OS_ACC_F_OK 0 /* Test for existence. */ +#define OS_ACC_X_OK 1 /* Test for execute permission. */ +#define OS_ACC_W_OK 2 /* Test for write permission. */ +#define OS_ACC_R_OK 4 /* Test for read permission. */ +#define OS_ACC_RW_OK (OS_ACC_W_OK | OS_ACC_R_OK) /* Test for RW permission */ + +/* + * types taken from stat_file() in hostfs_user.c + * (if they are wrong here, they are wrong there...). + */ +struct uml_stat { + int ust_dev; /* device */ + unsigned long long ust_ino; /* inode */ + int ust_mode; /* protection */ + int ust_nlink; /* number of hard links */ + int ust_uid; /* user ID of owner */ + int ust_gid; /* group ID of owner */ + unsigned long long ust_size; /* total size, in bytes */ + int ust_blksize; /* blocksize for filesystem I/O */ + unsigned long long ust_blocks; /* number of blocks allocated */ + unsigned long ust_atime; /* time of last access */ + unsigned long ust_mtime; /* time of last modification */ + unsigned long ust_ctime; /* time of last change */ +}; + +struct openflags { + unsigned int r : 1; + unsigned int w : 1; + unsigned int s : 1; /* O_SYNC */ + unsigned int c : 1; /* O_CREAT */ + unsigned int t : 1; /* O_TRUNC */ + unsigned int a : 1; /* O_APPEND */ + unsigned int e : 1; /* O_EXCL */ + unsigned int cl : 1; /* FD_CLOEXEC */ +}; + +#define OPENFLAGS() ((struct openflags) { .r = 0, .w = 0, .s = 0, .c = 0, \ + .t = 0, .a = 0, .e = 0, .cl = 0 }) + +static inline struct openflags of_read(struct openflags flags) +{ + flags.r = 1; + return flags; +} + +static inline struct openflags of_write(struct openflags flags) +{ + flags.w = 1; + return flags; +} + +static inline struct openflags of_rdwr(struct openflags flags) +{ + return of_read(of_write(flags)); +} + +static inline struct openflags of_set_rw(struct openflags flags, int r, int w) +{ + flags.r = r; + flags.w = w; + return flags; +} + +static inline struct openflags of_sync(struct openflags flags) +{ + flags.s = 1; + return flags; +} + +static inline struct openflags of_create(struct openflags flags) +{ + flags.c = 1; + return flags; +} + +static inline struct openflags of_trunc(struct openflags flags) +{ + flags.t = 1; + return flags; +} + +static inline struct openflags of_append(struct openflags flags) +{ + flags.a = 1; + return flags; +} + +static inline struct openflags of_excl(struct openflags flags) +{ + flags.e = 1; + return flags; +} + +static inline struct openflags of_cloexec(struct openflags flags) +{ + flags.cl = 1; + return flags; +} + +/* file.c */ +extern int os_stat_file(const char *file_name, struct uml_stat *buf); +extern int os_stat_fd(const int fd, struct uml_stat *buf); +extern int os_access(const char *file, int mode); +extern int os_set_exec_close(int fd); +extern int os_ioctl_generic(int fd, unsigned int cmd, unsigned long arg); +extern int os_get_ifname(int fd, char *namebuf); +extern int os_set_slip(int fd); +extern int os_mode_fd(int fd, int mode); + +extern int os_seek_file(int fd, unsigned long long offset); +extern int os_open_file(const char *file, struct openflags flags, int mode); +extern int os_read_file(int fd, void *buf, int len); +extern int os_write_file(int fd, const void *buf, int count); +extern int os_file_size(const char *file, unsigned long long *size_out); +extern int os_file_modtime(const char *file, unsigned long *modtime); +extern int os_pipe(int *fd, int stream, int close_on_exec); +extern int os_set_fd_async(int fd); +extern int os_clear_fd_async(int fd); +extern int os_set_fd_block(int fd, int blocking); +extern int os_accept_connection(int fd); +extern int os_create_unix_socket(const char *file, int len, int close_on_exec); +extern int os_shutdown_socket(int fd, int r, int w); +extern void os_close_file(int fd); +extern int os_rcv_fd(int fd, int *helper_pid_out); +extern int create_unix_socket(char *file, int len, int close_on_exec); +extern int os_connect_socket(const char *name); +extern int os_file_type(char *file); +extern int os_file_mode(const char *file, struct openflags *mode_out); +extern int os_lock_file(int fd, int excl); +extern void os_flush_stdout(void); +extern int os_stat_filesystem(char *path, long *bsize_out, + long long *blocks_out, long long *bfree_out, + long long *bavail_out, long long *files_out, + long long *ffree_out, void *fsid_out, + int fsid_size, long *namelen_out, + long *spare_out); +extern int os_change_dir(char *dir); +extern int os_fchange_dir(int fd); + +/* start_up.c */ +extern void os_early_checks(void); +extern void can_do_skas(void); +extern void os_check_bugs(void); +extern void check_host_supports_tls(int *supports_tls, int *tls_min); + +/* mem.c */ +extern int create_mem_file(unsigned long long len); + +/* process.c */ +extern unsigned long os_process_pc(int pid); +extern int os_process_parent(int pid); +extern void os_stop_process(int pid); +extern void os_kill_process(int pid, int reap_child); +extern void os_kill_ptraced_process(int pid, int reap_child); +extern long os_ptrace_ldt(long pid, long addr, long data); + +extern int os_getpid(void); +extern int os_getpgrp(void); + +extern void init_new_thread_signals(void); +extern int run_kernel_thread(int (*fn)(void *), void *arg, jmp_buf **jmp_ptr); + +extern int os_map_memory(void *virt, int fd, unsigned long long off, + unsigned long len, int r, int w, int x); +extern int os_protect_memory(void *addr, unsigned long len, + int r, int w, int x); +extern int os_unmap_memory(void *addr, int len); +extern int os_drop_memory(void *addr, int length); +extern int can_drop_memory(void); +extern void os_flush_stdout(void); + +/* uaccess.c */ +extern unsigned long __do_user_copy(void *to, const void *from, int n, + void **fault_addr, jmp_buf **fault_catcher, + void (*op)(void *to, const void *from, + int n), int *faulted_out); + +/* execvp.c */ +extern int execvp_noalloc(char *buf, const char *file, char *const argv[]); +/* helper.c */ +extern int run_helper(void (*pre_exec)(void *), void *pre_data, char **argv); +extern int run_helper_thread(int (*proc)(void *), void *arg, + unsigned int flags, unsigned long *stack_out); +extern int helper_wait(int pid); + + +/* tls.c */ +extern int os_set_thread_area(user_desc_t *info, int pid); +extern int os_get_thread_area(user_desc_t *info, int pid); + +/* umid.c */ +extern int umid_file_name(char *name, char *buf, int len); +extern int set_umid(char *name); +extern char *get_umid(void); + +/* signal.c */ +extern void timer_init(void); +extern void set_sigstack(void *sig_stack, int size); +extern void remove_sigstack(void); +extern void set_handler(int sig, void (*handler)(int), int flags, ...); +extern int change_sig(int signal, int on); +extern void block_signals(void); +extern void unblock_signals(void); +extern int get_signals(void); +extern int set_signals(int enable); + +/* util.c */ +extern void stack_protections(unsigned long address); +extern int raw(int fd); +extern void setup_machinename(char *machine_out); +extern void setup_hostinfo(char *buf, int len); +extern void os_dump_core(void) __attribute__ ((noreturn)); + +/* time.c */ +extern void idle_sleep(unsigned long long nsecs); +extern int set_interval(void); +extern int timer_one_shot(int ticks); +extern long long disable_timer(void); +extern void uml_idle_timer(void); +extern long long os_nsecs(void); + +/* skas/mem.c */ +extern long run_syscall_stub(struct mm_id * mm_idp, + int syscall, unsigned long *args, long expected, + void **addr, int done); +extern long syscall_stub_data(struct mm_id * mm_idp, + unsigned long *data, int data_count, + void **addr, void **stub_addr); +extern int map(struct mm_id * mm_idp, unsigned long virt, + unsigned long len, int prot, int phys_fd, + unsigned long long offset, int done, void **data); +extern int unmap(struct mm_id * mm_idp, unsigned long addr, unsigned long len, + int done, void **data); +extern int protect(struct mm_id * mm_idp, unsigned long addr, + unsigned long len, unsigned int prot, int done, void **data); + +/* skas/process.c */ +extern int is_skas_winch(int pid, int fd, void *data); +extern int start_userspace(unsigned long stub_stack); +extern int copy_context_skas0(unsigned long stack, int pid); +extern void userspace(struct uml_pt_regs *regs); +extern int map_stub_pages(int fd, unsigned long code, unsigned long data, + unsigned long stack); +extern void new_thread(void *stack, jmp_buf *buf, void (*handler)(void)); +extern void switch_threads(jmp_buf *me, jmp_buf *you); +extern int start_idle_thread(void *stack, jmp_buf *switch_buf); +extern void initial_thread_cb_skas(void (*proc)(void *), + void *arg); +extern void halt_skas(void); +extern void reboot_skas(void); + +/* irq.c */ +extern int os_waiting_for_events(struct irq_fd *active_fds); +extern int os_create_pollfd(int fd, int events, void *tmp_pfd, int size_tmpfds); +extern void os_free_irq_by_cb(int (*test)(struct irq_fd *, void *), void *arg, + struct irq_fd *active_fds, struct irq_fd ***last_irq_ptr2); +extern void os_free_irq_later(struct irq_fd *active_fds, + int irq, void *dev_id); +extern int os_get_pollfd(int i); +extern void os_set_pollfd(int i, int fd); +extern void os_set_ioignore(void); + +/* sigio.c */ +extern int add_sigio_fd(int fd); +extern int ignore_sigio_fd(int fd); +extern void maybe_sigio_broken(int fd, int read); +extern void sigio_broken(int fd, int read); + +/* sys-x86_64/prctl.c */ +extern int os_arch_prctl(int pid, int code, unsigned long *addr); + +/* tty.c */ +extern int get_pty(void); + +/* sys-$ARCH/task_size.c */ +extern unsigned long os_get_top_address(void); + +#endif diff --git a/arch/um/include/shared/process.h b/arch/um/include/shared/process.h new file mode 100644 index 0000000..bb873a5 --- /dev/null +++ b/arch/um/include/shared/process.h @@ -0,0 +1,17 @@ +/* + * Copyright (C) 2000 - 2008 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#ifndef __PROCESS_H__ +#define __PROCESS_H__ + +#include <signal.h> + +/* Copied from linux/compiler-gcc.h since we can't include it directly */ +#define barrier() __asm__ __volatile__("": : :"memory") + +extern void sig_handler(int sig, struct sigcontext *sc); +extern void alarm_handler(int sig, struct sigcontext *sc); + +#endif diff --git a/arch/um/include/shared/ptrace_user.h b/arch/um/include/shared/ptrace_user.h new file mode 100644 index 0000000..4bce6e0 --- /dev/null +++ b/arch/um/include/shared/ptrace_user.h @@ -0,0 +1,55 @@ +/* + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#ifndef __PTRACE_USER_H__ +#define __PTRACE_USER_H__ + +#include "sysdep/ptrace_user.h" + +extern int ptrace_getregs(long pid, unsigned long *regs_out); +extern int ptrace_setregs(long pid, unsigned long *regs_in); + +/* syscall emulation path in ptrace */ + +#ifndef PTRACE_SYSEMU +#define PTRACE_SYSEMU 31 +#endif +#ifndef PTRACE_SYSEMU_SINGLESTEP +#define PTRACE_SYSEMU_SINGLESTEP 32 +#endif + +/* On architectures, that started to support PTRACE_O_TRACESYSGOOD + * in linux 2.4, there are two different definitions of + * PTRACE_SETOPTIONS: linux 2.4 uses 21 while linux 2.6 uses 0x4200. + * For binary compatibility, 2.6 also supports the old "21", named + * PTRACE_OLDSETOPTION. On these architectures, UML always must use + * "21", to ensure the kernel runs on 2.4 and 2.6 host without + * recompilation. So, we use PTRACE_OLDSETOPTIONS in UML. + * We also want to be able to build the kernel on 2.4, which doesn't + * have PTRACE_OLDSETOPTIONS. So, if it is missing, we declare + * PTRACE_OLDSETOPTIONS to to be the same as PTRACE_SETOPTIONS. + * + * On architectures, that start to support PTRACE_O_TRACESYSGOOD on + * linux 2.6, PTRACE_OLDSETOPTIONS never is defined, and also isn't + * supported by the host kernel. In that case, our trick lets us use + * the new 0x4200 with the name PTRACE_OLDSETOPTIONS. + */ +#ifndef PTRACE_OLDSETOPTIONS +#define PTRACE_OLDSETOPTIONS PTRACE_SETOPTIONS +#endif + +void set_using_sysemu(int value); +int get_using_sysemu(void); +extern int sysemu_supported; + +#define SELECT_PTRACE_OPERATION(sysemu_mode, singlestep_mode) \ + (((int[3][3] ) { \ + { PTRACE_SYSCALL, PTRACE_SYSCALL, PTRACE_SINGLESTEP }, \ + { PTRACE_SYSEMU, PTRACE_SYSEMU, PTRACE_SINGLESTEP }, \ + { PTRACE_SYSEMU, PTRACE_SYSEMU_SINGLESTEP, \ + PTRACE_SYSEMU_SINGLESTEP } }) \ + [sysemu_mode][singlestep_mode]) + +#endif diff --git a/arch/um/include/shared/registers.h b/arch/um/include/shared/registers.h new file mode 100644 index 0000000..b0b4589 --- /dev/null +++ b/arch/um/include/shared/registers.h @@ -0,0 +1,24 @@ +/* + * Copyright (C) 2004 PathScale, Inc + * Licensed under the GPL + */ + +#ifndef __REGISTERS_H +#define __REGISTERS_H + +#include "sysdep/ptrace.h" +#include "sysdep/archsetjmp.h" + +extern int save_fp_registers(int pid, unsigned long *fp_regs); +extern int restore_fp_registers(int pid, unsigned long *fp_regs); +extern int save_fpx_registers(int pid, unsigned long *fp_regs); +extern int restore_fpx_registers(int pid, unsigned long *fp_regs); +extern int save_registers(int pid, struct uml_pt_regs *regs); +extern int restore_registers(int pid, struct uml_pt_regs *regs); +extern int init_registers(int pid); +extern void get_safe_registers(unsigned long *regs); +extern unsigned long get_thread_reg(int reg, jmp_buf *buf); +extern int get_fp_registers(int pid, unsigned long *regs); +extern int put_fp_registers(int pid, unsigned long *regs); + +#endif diff --git a/arch/um/include/shared/sigio.h b/arch/um/include/shared/sigio.h new file mode 100644 index 0000000..434f1a9 --- /dev/null +++ b/arch/um/include/shared/sigio.h @@ -0,0 +1,14 @@ +/* + * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __SIGIO_H__ +#define __SIGIO_H__ + +extern int write_sigio_irq(int fd); +extern int register_sigio_fd(int fd); +extern void sigio_lock(void); +extern void sigio_unlock(void); + +#endif diff --git a/arch/um/include/shared/skas/mm_id.h b/arch/um/include/shared/skas/mm_id.h new file mode 100644 index 0000000..48dd098 --- /dev/null +++ b/arch/um/include/shared/skas/mm_id.h @@ -0,0 +1,17 @@ +/* + * Copyright (C) 2005 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __MM_ID_H +#define __MM_ID_H + +struct mm_id { + union { + int mm_fd; + int pid; + } u; + unsigned long stack; +}; + +#endif diff --git a/arch/um/include/shared/skas/proc_mm.h b/arch/um/include/shared/skas/proc_mm.h new file mode 100644 index 0000000..9028092 --- /dev/null +++ b/arch/um/include/shared/skas/proc_mm.h @@ -0,0 +1,44 @@ +/* + * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __SKAS_PROC_MM_H +#define __SKAS_PROC_MM_H + +#define MM_MMAP 54 +#define MM_MUNMAP 55 +#define MM_MPROTECT 56 +#define MM_COPY_SEGMENTS 57 + +struct mm_mmap { + unsigned long addr; + unsigned long len; + unsigned long prot; + unsigned long flags; + unsigned long fd; + unsigned long offset; +}; + +struct mm_munmap { + unsigned long addr; + unsigned long len; +}; + +struct mm_mprotect { + unsigned long addr; + unsigned long len; + unsigned int prot; +}; + +struct proc_mm_op { + int op; + union { + struct mm_mmap mmap; + struct mm_munmap munmap; + struct mm_mprotect mprotect; + int copy_segments; + } u; +}; + +#endif diff --git a/arch/um/include/shared/skas/skas.h b/arch/um/include/shared/skas/skas.h new file mode 100644 index 0000000..64d2c74 --- /dev/null +++ b/arch/um/include/shared/skas/skas.h @@ -0,0 +1,22 @@ +/* + * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#ifndef __SKAS_H +#define __SKAS_H + +#include "sysdep/ptrace.h" + +extern int userspace_pid[]; +extern int proc_mm, ptrace_faultinfo, ptrace_ldt; +extern int skas_needs_stub; + +extern int user_thread(unsigned long stack, int flags); +extern void new_thread_handler(void); +extern void handle_syscall(struct uml_pt_regs *regs); +extern int new_mm(unsigned long stack); +extern long execute_syscall_skas(void *r); +extern unsigned long current_stub_stack(void); + +#endif diff --git a/arch/um/include/shared/skas/stub-data.h b/arch/um/include/shared/skas/stub-data.h new file mode 100644 index 0000000..f6ed92c --- /dev/null +++ b/arch/um/include/shared/skas/stub-data.h @@ -0,0 +1,18 @@ +/* + * Copyright (C) 2005 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __STUB_DATA_H +#define __STUB_DATA_H + +#include <sys/time.h> + +struct stub_data { + long offset; + int fd; + struct itimerval timer; + long err; +}; + +#endif diff --git a/arch/um/include/shared/skas_ptrace.h b/arch/um/include/shared/skas_ptrace.h new file mode 100644 index 0000000..3d31bba --- /dev/null +++ b/arch/um/include/shared/skas_ptrace.h @@ -0,0 +1,14 @@ +/* + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#ifndef __SKAS_PTRACE_H +#define __SKAS_PTRACE_H + +#define PTRACE_FAULTINFO 52 +#define PTRACE_SWITCH_MM 55 + +#include "sysdep/skas_ptrace.h" + +#endif diff --git a/arch/um/include/shared/skas_ptregs.h b/arch/um/include/shared/skas_ptregs.h new file mode 100644 index 0000000..73db19e --- /dev/null +++ b/arch/um/include/shared/skas_ptregs.h @@ -0,0 +1,6 @@ +#ifndef __SKAS_PT_REGS_ +#define __SKAS_PT_REGS_ + +#include <user_constants.h> + +#endif diff --git a/arch/um/include/shared/syscall.h b/arch/um/include/shared/syscall.h new file mode 100644 index 0000000..dda1df9 --- /dev/null +++ b/arch/um/include/shared/syscall.h @@ -0,0 +1,12 @@ +/* + * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __SYSCALL_USER_H +#define __SYSCALL_USER_H + +extern int record_syscall_start(int syscall); +extern void record_syscall_end(int index, long result); + +#endif diff --git a/arch/um/include/shared/sysrq.h b/arch/um/include/shared/sysrq.h new file mode 100644 index 0000000..c8d332b --- /dev/null +++ b/arch/um/include/shared/sysrq.h @@ -0,0 +1,7 @@ +#ifndef __UM_SYSRQ_H +#define __UM_SYSRQ_H + +struct task_struct; +extern void show_trace(struct task_struct* task, unsigned long *stack); + +#endif diff --git a/arch/um/include/shared/task.h b/arch/um/include/shared/task.h new file mode 100644 index 0000000..3fe726b --- /dev/null +++ b/arch/um/include/shared/task.h @@ -0,0 +1,9 @@ +#ifndef __TASK_H +#define __TASK_H + +#include <kern_constants.h> + +#define TASK_REGS(task) ((struct uml_pt_regs *) &(((char *) (task))[HOST_TASK_REGS])) +#define TASK_PID(task) *((int *) &(((char *) (task))[HOST_TASK_PID])) + +#endif diff --git a/arch/um/include/shared/tlb.h b/arch/um/include/shared/tlb.h new file mode 100644 index 0000000..ecd2265 --- /dev/null +++ b/arch/um/include/shared/tlb.h @@ -0,0 +1,15 @@ +/* + * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __TLB_H__ +#define __TLB_H__ + +#include "um_mmu.h" + +extern void force_flush_all(void); +extern int flush_tlb_kernel_range_common(unsigned long start, + unsigned long end); + +#endif diff --git a/arch/um/include/shared/ubd_user.h b/arch/um/include/shared/ubd_user.h new file mode 100644 index 0000000..bb66517 --- /dev/null +++ b/arch/um/include/shared/ubd_user.h @@ -0,0 +1,26 @@ +/* + * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) + * Copyright (C) 2001 RidgeRun, Inc (glonnon@ridgerun.com) + * Licensed under the GPL + */ + +#ifndef __UM_UBD_USER_H +#define __UM_UBD_USER_H + +extern void ignore_sigwinch_sig(void); +extern int start_io_thread(unsigned long sp, int *fds_out); +extern int io_thread(void *arg); +extern int kernel_fd; + +#endif + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff --git a/arch/um/include/shared/um_malloc.h b/arch/um/include/shared/um_malloc.h new file mode 100644 index 0000000..c554d70 --- /dev/null +++ b/arch/um/include/shared/um_malloc.h @@ -0,0 +1,19 @@ +/* + * Copyright (C) 2005 Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it> + * Licensed under the GPL + */ + +#ifndef __UM_MALLOC_H__ +#define __UM_MALLOC_H__ + +#include "kern_constants.h" + +extern void *uml_kmalloc(int size, int flags); +extern void kfree(const void *ptr); + +extern void *vmalloc(unsigned long size); +extern void vfree(void *ptr); + +#endif /* __UM_MALLOC_H__ */ + + diff --git a/arch/um/include/shared/um_mmu.h b/arch/um/include/shared/um_mmu.h new file mode 100644 index 0000000..b1a7e47 --- /dev/null +++ b/arch/um/include/shared/um_mmu.h @@ -0,0 +1,24 @@ +/* + * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#ifndef __ARCH_UM_MMU_H +#define __ARCH_UM_MMU_H + +#include "mm_id.h" +#include "ldt.h" + +typedef struct mm_context { + struct mm_id id; + struct uml_ldt ldt; + struct page **stub_pages; +} mm_context_t; + +extern void __switch_mm(struct mm_id * mm_idp); + +/* Avoid tangled inclusion with asm/ldt.h */ +extern long init_new_ldt(struct mm_context *to_mm, struct mm_context *from_mm); +extern void free_ldt(struct mm_context *mm); + +#endif diff --git a/arch/um/include/shared/um_uaccess.h b/arch/um/include/shared/um_uaccess.h new file mode 100644 index 0000000..45c0499 --- /dev/null +++ b/arch/um/include/shared/um_uaccess.h @@ -0,0 +1,97 @@ +/* + * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#ifndef __ARCH_UM_UACCESS_H +#define __ARCH_UM_UACCESS_H + +#include <asm/elf.h> +#include <asm/fixmap.h> +#include "sysdep/archsetjmp.h" + +#define __under_task_size(addr, size) \ + (((unsigned long) (addr) < TASK_SIZE) && \ + (((unsigned long) (addr) + (size)) < TASK_SIZE)) + +#define __access_ok_vsyscall(type, addr, size) \ + ((type == VERIFY_READ) && \ + ((unsigned long) (addr) >= FIXADDR_USER_START) && \ + ((unsigned long) (addr) + (size) <= FIXADDR_USER_END) && \ + ((unsigned long) (addr) + (size) >= (unsigned long)(addr))) + +#define __addr_range_nowrap(addr, size) \ + ((unsigned long) (addr) <= ((unsigned long) (addr) + (size))) + +#define access_ok(type, addr, size) \ + (__addr_range_nowrap(addr, size) && \ + (__under_task_size(addr, size) || \ + __access_ok_vsyscall(type, addr, size) || \ + segment_eq(get_fs(), KERNEL_DS))) + +extern int copy_from_user(void *to, const void __user *from, int n); +extern int copy_to_user(void __user *to, const void *from, int n); + +extern int __do_copy_to_user(void *to, const void *from, int n, + void **fault_addr, jmp_buf **fault_catcher); + +/* + * strncpy_from_user: - Copy a NUL terminated string from userspace. + * @dst: Destination address, in kernel space. This buffer must be at + * least @count bytes long. + * @src: Source address, in user space. + * @count: Maximum number of bytes to copy, including the trailing NUL. + * + * Copies a NUL-terminated string from userspace to kernel space. + * + * On success, returns the length of the string (not including the trailing + * NUL). + * + * If access to userspace fails, returns -EFAULT (some data may have been + * copied). + * + * If @count is smaller than the length of the string, copies @count bytes + * and returns @count. + */ + +extern int strncpy_from_user(char *dst, const char __user *src, int count); + +/* + * __clear_user: - Zero a block of memory in user space, with less checking. + * @to: Destination address, in user space. + * @n: Number of bytes to zero. + * + * Zero a block of memory in user space. Caller must check + * the specified block with access_ok() before calling this function. + * + * Returns number of bytes that could not be cleared. + * On success, this will be zero. + */ +extern int __clear_user(void __user *mem, int len); + +/* + * clear_user: - Zero a block of memory in user space. + * @to: Destination address, in user space. + * @n: Number of bytes to zero. + * + * Zero a block of memory in user space. + * + * Returns number of bytes that could not be cleared. + * On success, this will be zero. + */ +extern int clear_user(void __user *mem, int len); + +/* + * strlen_user: - Get the size of a string in user space. + * @str: The string to measure. + * @n: The maximum valid length + * + * Get the size of a NUL-terminated string in user space. + * + * Returns the size of the string INCLUDING the terminating NUL. + * On exception, returns 0. + * If the string is too long, returns a value greater than @n. + */ +extern int strnlen_user(const void __user *str, int len); + +#endif diff --git a/arch/um/include/shared/user.h b/arch/um/include/shared/user.h new file mode 100644 index 0000000..293f7c7 --- /dev/null +++ b/arch/um/include/shared/user.h @@ -0,0 +1,45 @@ +/* + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#ifndef __USER_H__ +#define __USER_H__ + +#include "kern_constants.h" + +/* + * The usual definition - copied here because the kernel provides its own, + * fancier, type-safe, definition. Using that one would require + * copying too much infrastructure for my taste, so userspace files + * get less checking than kernel files. + */ +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) + +/* This is to get size_t */ +#ifdef __KERNEL__ +#include <linux/types.h> +#else +#include <stddef.h> +#endif + +extern void panic(const char *fmt, ...) + __attribute__ ((format (printf, 1, 2))); + +#ifdef UML_CONFIG_PRINTK +extern int printk(const char *fmt, ...) + __attribute__ ((format (printf, 1, 2))); +#else +static inline int printk(const char *fmt, ...) +{ + return 0; +} +#endif + +extern void schedule(void); +extern int in_aton(char *str); +extern int open_gdb_chan(void); +extern size_t strlcpy(char *, const char *, size_t); +extern size_t strlcat(char *, const char *, size_t); + +#endif diff --git a/arch/um/kernel/Makefile b/arch/um/kernel/Makefile new file mode 100644 index 0000000..499e5e9 --- /dev/null +++ b/arch/um/kernel/Makefile @@ -0,0 +1,44 @@ +# +# Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux,intel}.com) +# Licensed under the GPL +# + +extra-y := vmlinux.lds +clean-files := + +obj-y = config.o exec.o exitcode.o init_task.o irq.o ksyms.o mem.o \ + physmem.o process.o ptrace.o reboot.o sigio.o \ + signal.o smp.o syscall.o sysrq.o time.o tlb.o trap.o uaccess.o \ + um_arch.o umid.o skas/ + +obj-$(CONFIG_BLK_DEV_INITRD) += initrd.o +obj-$(CONFIG_GPROF) += gprof_syms.o +obj-$(CONFIG_GCOV) += gmon_syms.o + +USER_OBJS := config.o + +include arch/um/scripts/Makefile.rules + +targets := config.c config.tmp + +# Be careful with the below Sed code - sed is pitfall-rich! +# We use sed to lower build requirements, for "embedded" builders for instance. + +$(obj)/config.tmp: $(objtree)/.config FORCE + $(call if_changed,quote1) + +quiet_cmd_quote1 = QUOTE $@ + cmd_quote1 = sed -e 's/"/\\"/g' -e 's/^/"/' -e 's/$$/\\n"/' \ + $< > $@ + +$(obj)/config.c: $(src)/config.c.in $(obj)/config.tmp FORCE + $(call if_changed,quote2) + +quiet_cmd_quote2 = QUOTE $@ + cmd_quote2 = sed -e '/CONFIG/{' \ + -e 's/"CONFIG"\;/""/' \ + -e 'r $(obj)/config.tmp' \ + -e 'a \' \ + -e '""\;' \ + -e '}' \ + $< > $@ diff --git a/arch/um/kernel/asm-offsets.c b/arch/um/kernel/asm-offsets.c new file mode 100644 index 0000000..91ea538 --- /dev/null +++ b/arch/um/kernel/asm-offsets.c @@ -0,0 +1 @@ +#include "sysdep/kernel-offsets.h" diff --git a/arch/um/kernel/config.c.in b/arch/um/kernel/config.c.in new file mode 100644 index 0000000..c062cbf --- /dev/null +++ b/arch/um/kernel/config.c.in @@ -0,0 +1,32 @@ +/* + * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#include <stdio.h> +#include <stdlib.h> +#include "init.h" + +static __initdata char *config = "CONFIG"; + +static int __init print_config(char *line, int *add) +{ + printf("%s", config); + exit(0); +} + +__uml_setup("--showconfig", print_config, +"--showconfig\n" +" Prints the config file that this UML binary was generated from.\n\n" +); + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff --git a/arch/um/kernel/dyn.lds.S b/arch/um/kernel/dyn.lds.S new file mode 100644 index 0000000..9975e1a --- /dev/null +++ b/arch/um/kernel/dyn.lds.S @@ -0,0 +1,159 @@ +#include <asm-generic/vmlinux.lds.h> +#include <asm/page.h> + +OUTPUT_FORMAT(ELF_FORMAT) +OUTPUT_ARCH(ELF_ARCH) +ENTRY(_start) +jiffies = jiffies_64; + +SECTIONS +{ + PROVIDE (__executable_start = START); + . = START + SIZEOF_HEADERS; + .interp : { *(.interp) } + __binary_start = .; + . = ALIGN(4096); /* Init code and data */ + _text = .; + _stext = .; + __init_begin = .; + .init.text : { + _sinittext = .; + INIT_TEXT + _einittext = .; + } + + . = ALIGN(PAGE_SIZE); + + /* Read-only sections, merged into text segment: */ + .hash : { *(.hash) } + .gnu.hash : { *(.gnu.hash) } + .dynsym : { *(.dynsym) } + .dynstr : { *(.dynstr) } + .gnu.version : { *(.gnu.version) } + .gnu.version_d : { *(.gnu.version_d) } + .gnu.version_r : { *(.gnu.version_r) } + .rel.init : { *(.rel.init) } + .rela.init : { *(.rela.init) } + .rel.text : { *(.rel.text .rel.text.* .rel.gnu.linkonce.t.*) } + .rela.text : { *(.rela.text .rela.text.* .rela.gnu.linkonce.t.*) } + .rel.fini : { *(.rel.fini) } + .rela.fini : { *(.rela.fini) } + .rel.rodata : { *(.rel.rodata .rel.rodata.* .rel.gnu.linkonce.r.*) } + .rela.rodata : { *(.rela.rodata .rela.rodata.* .rela.gnu.linkonce.r.*) } + .rel.data : { *(.rel.data .rel.data.* .rel.gnu.linkonce.d.*) } + .rela.data : { *(.rela.data .rela.data.* .rela.gnu.linkonce.d.*) } + .rel.tdata : { *(.rel.tdata .rel.tdata.* .rel.gnu.linkonce.td.*) } + .rela.tdata : { *(.rela.tdata .rela.tdata.* .rela.gnu.linkonce.td.*) } + .rel.tbss : { *(.rel.tbss .rel.tbss.* .rel.gnu.linkonce.tb.*) } + .rela.tbss : { *(.rela.tbss .rela.tbss.* .rela.gnu.linkonce.tb.*) } + .rel.ctors : { *(.rel.ctors) } + .rela.ctors : { *(.rela.ctors) } + .rel.dtors : { *(.rel.dtors) } + .rela.dtors : { *(.rela.dtors) } + .rel.got : { *(.rel.got) } + .rela.got : { *(.rela.got) } + .rel.bss : { *(.rel.bss .rel.bss.* .rel.gnu.linkonce.b.*) } + .rela.bss : { *(.rela.bss .rela.bss.* .rela.gnu.linkonce.b.*) } + .rel.plt : { *(.rel.plt) } + .rela.plt : { *(.rela.plt) } + .init : { + KEEP (*(.init)) + } =0x90909090 + .plt : { *(.plt) } + .text : { + TEXT_TEXT + SCHED_TEXT + LOCK_TEXT + *(.fixup) + *(.stub .text.* .gnu.linkonce.t.*) + /* .gnu.warning sections are handled specially by elf32.em. */ + *(.gnu.warning) + + . = ALIGN(PAGE_SIZE); + } =0x90909090 + . = ALIGN(PAGE_SIZE); + .syscall_stub : { + __syscall_stub_start = .; + *(.__syscall_stub*) + __syscall_stub_end = .; + } + .fini : { + KEEP (*(.fini)) + } =0x90909090 + + .kstrtab : { *(.kstrtab) } + + #include "asm/common.lds.S" + + init.data : { INIT_DATA } + + /* Ensure the __preinit_array_start label is properly aligned. We + could instead move the label definition inside the section, but + the linker would then create the section even if it turns out to + be empty, which isn't pretty. */ + . = ALIGN(32 / 8); + .preinit_array : { *(.preinit_array) } + .init_array : { *(.init_array) } + .fini_array : { *(.fini_array) } + .data : { + . = ALIGN(KERNEL_STACK_SIZE); /* init_task */ + *(.data.init_task) + . = ALIGN(KERNEL_STACK_SIZE); + *(.data.init_irqstack) + DATA_DATA + *(.data.* .gnu.linkonce.d.*) + SORT(CONSTRUCTORS) + } + .data1 : { *(.data1) } + .tdata : { *(.tdata .tdata.* .gnu.linkonce.td.*) } + .tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) } + .eh_frame : { KEEP (*(.eh_frame)) } + .gcc_except_table : { *(.gcc_except_table) } + .dynamic : { *(.dynamic) } + .ctors : { + /* gcc uses crtbegin.o to find the start of + the constructors, so we make sure it is + first. Because this is a wildcard, it + doesn't matter if the user does not + actually link against crtbegin.o; the + linker won't look for a file to match a + wildcard. The wildcard also means that it + doesn't matter which directory crtbegin.o + is in. */ + KEEP (*crtbegin.o(.ctors)) + /* We don't want to include the .ctor section from + from the crtend.o file until after the sorted ctors. + The .ctor section from the crtend file contains the + end of ctors marker and it must be last */ + KEEP (*(EXCLUDE_FILE (*crtend.o ) .ctors)) + KEEP (*(SORT(.ctors.*))) + KEEP (*(.ctors)) + } + .dtors : { + KEEP (*crtbegin.o(.dtors)) + KEEP (*(EXCLUDE_FILE (*crtend.o ) .dtors)) + KEEP (*(SORT(.dtors.*))) + KEEP (*(.dtors)) + } + .jcr : { KEEP (*(.jcr)) } + .got : { *(.got.plt) *(.got) } + _edata = .; + PROVIDE (edata = .); + .bss : { + __bss_start = .; + *(.dynbss) + *(.bss .bss.* .gnu.linkonce.b.*) + *(COMMON) + /* Align here to ensure that the .bss section occupies space up to + _end. Align after .bss to ensure correct alignment even if the + .bss section disappears because there are no input sections. */ + . = ALIGN(32 / 8); + . = ALIGN(32 / 8); + } + _end = .; + PROVIDE (end = .); + + STABS_DEBUG + + DWARF_DEBUG +} diff --git a/arch/um/kernel/exec.c b/arch/um/kernel/exec.c new file mode 100644 index 0000000..fda30d2 --- /dev/null +++ b/arch/um/kernel/exec.c @@ -0,0 +1,88 @@ +/* + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include "linux/stddef.h" +#include "linux/fs.h" +#include "linux/smp_lock.h" +#include "linux/ptrace.h" +#include "linux/sched.h" +#include "asm/current.h" +#include "asm/processor.h" +#include "asm/uaccess.h" +#include "as-layout.h" +#include "mem_user.h" +#include "skas.h" +#include "os.h" +#include "internal.h" + +void flush_thread(void) +{ + void *data = NULL; + int ret; + + arch_flush_thread(¤t->thread.arch); + + ret = unmap(¤t->mm->context.id, 0, STUB_START, 0, &data); + ret = ret || unmap(¤t->mm->context.id, STUB_END, + host_task_size - STUB_END, 1, &data); + if (ret) { + printk(KERN_ERR "flush_thread - clearing address space failed, " + "err = %d\n", ret); + force_sig(SIGKILL, current); + } + + __switch_mm(¤t->mm->context.id); +} + +void start_thread(struct pt_regs *regs, unsigned long eip, unsigned long esp) +{ + set_fs(USER_DS); + PT_REGS_IP(regs) = eip; + PT_REGS_SP(regs) = esp; +} + +static long execve1(char *file, char __user * __user *argv, + char __user *__user *env) +{ + long error; + + error = do_execve(file, argv, env, ¤t->thread.regs); + if (error == 0) { + task_lock(current); + current->ptrace &= ~PT_DTRACE; +#ifdef SUBARCH_EXECVE1 + SUBARCH_EXECVE1(¤t->thread.regs.regs); +#endif + task_unlock(current); + } + return error; +} + +long um_execve(char *file, char __user *__user *argv, char __user *__user *env) +{ + long err; + + err = execve1(file, argv, env); + if (!err) + UML_LONGJMP(current->thread.exec_buf, 1); + return err; +} + +long sys_execve(char __user *file, char __user *__user *argv, + char __user *__user *env) +{ + long error; + char *filename; + + lock_kernel(); + filename = getname(file); + error = PTR_ERR(filename); + if (IS_ERR(filename)) goto out; + error = execve1(filename, argv, env); + putname(filename); + out: + unlock_kernel(); + return error; +} diff --git a/arch/um/kernel/exitcode.c b/arch/um/kernel/exitcode.c new file mode 100644 index 0000000..6540d2c --- /dev/null +++ b/arch/um/kernel/exitcode.c @@ -0,0 +1,75 @@ +/* + * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include <linux/ctype.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/proc_fs.h> +#include <linux/types.h> +#include <asm/uaccess.h> + +/* + * If read and write race, the read will still atomically read a valid + * value. + */ +int uml_exitcode = 0; + +static int read_proc_exitcode(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + int len, val; + + /* + * Save uml_exitcode in a local so that we don't need to guarantee + * that sprintf accesses it atomically. + */ + val = uml_exitcode; + len = sprintf(page, "%d\n", val); + len -= off; + if (len <= off+count) + *eof = 1; + *start = page + off; + if (len > count) + len = count; + if (len < 0) + len = 0; + return len; +} + +static int write_proc_exitcode(struct file *file, const char __user *buffer, + unsigned long count, void *data) +{ + char *end, buf[sizeof("nnnnn\0")]; + int tmp; + + if (copy_from_user(buf, buffer, count)) + return -EFAULT; + + tmp = simple_strtol(buf, &end, 0); + if ((*end != '\0') && !isspace(*end)) + return -EINVAL; + + uml_exitcode = tmp; + return count; +} + +static int make_proc_exitcode(void) +{ + struct proc_dir_entry *ent; + + ent = create_proc_entry("exitcode", 0600, NULL); + if (ent == NULL) { + printk(KERN_WARNING "make_proc_exitcode : Failed to register " + "/proc/exitcode\n"); + return 0; + } + + ent->read_proc = read_proc_exitcode; + ent->write_proc = write_proc_exitcode; + + return 0; +} + +__initcall(make_proc_exitcode); diff --git a/arch/um/kernel/gmon_syms.c b/arch/um/kernel/gmon_syms.c new file mode 100644 index 0000000..72eccd2 --- /dev/null +++ b/arch/um/kernel/gmon_syms.c @@ -0,0 +1,24 @@ +/* + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include "linux/module.h" + +extern void __bb_init_func(void *) __attribute__((weak)); +EXPORT_SYMBOL(__bb_init_func); + +/* + * This is defined (and referred to in profiling stub code) only by some GCC + * versions in libgcov. + * + * Since SuSE backported the fix, we cannot handle it depending on GCC version. + * So, unconditionally export it. But also give it a weak declaration, which + * will be overridden by any other one. + */ + +extern void __gcov_init(void *) __attribute__((weak)); +EXPORT_SYMBOL(__gcov_init); + +extern void __gcov_merge_add(void *) __attribute__((weak)); +EXPORT_SYMBOL(__gcov_merge_add); diff --git a/arch/um/kernel/gprof_syms.c b/arch/um/kernel/gprof_syms.c new file mode 100644 index 0000000..e2f043d --- /dev/null +++ b/arch/um/kernel/gprof_syms.c @@ -0,0 +1,9 @@ +/* + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include "linux/module.h" + +extern void mcount(void); +EXPORT_SYMBOL(mcount); diff --git a/arch/um/kernel/init_task.c b/arch/um/kernel/init_task.c new file mode 100644 index 0000000..910eda8 --- /dev/null +++ b/arch/um/kernel/init_task.c @@ -0,0 +1,43 @@ +/* + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,intel.linux}.com) + * Licensed under the GPL + */ + +#include "linux/sched.h" +#include "linux/init_task.h" +#include "linux/fs.h" +#include "linux/module.h" +#include "linux/mqueue.h" +#include "asm/uaccess.h" + +static struct fs_struct init_fs = INIT_FS; +struct mm_struct init_mm = INIT_MM(init_mm); +static struct signal_struct init_signals = INIT_SIGNALS(init_signals); +static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); +EXPORT_SYMBOL(init_mm); + +/* + * Initial task structure. + * + * All other task structs will be allocated on slabs in fork.c + */ + +struct task_struct init_task = INIT_TASK(init_task); + +EXPORT_SYMBOL(init_task); + +/* + * Initial thread structure. + * + * We need to make sure that this is aligned due to the + * way process stacks are handled. This is done by having a special + * "init_task" linker map entry.. + */ + +union thread_union init_thread_union + __attribute__((__section__(".data.init_task"))) = + { INIT_THREAD_INFO(init_task) }; + +union thread_union cpu0_irqstack + __attribute__((__section__(".data.init_irqstack"))) = + { INIT_THREAD_INFO(init_task) }; diff --git a/arch/um/kernel/initrd.c b/arch/um/kernel/initrd.c new file mode 100644 index 0000000..d386c75 --- /dev/null +++ b/arch/um/kernel/initrd.c @@ -0,0 +1,85 @@ +/* + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include "linux/init.h" +#include "linux/bootmem.h" +#include "linux/initrd.h" +#include "asm/types.h" +#include "initrd.h" +#include "init.h" +#include "os.h" + +/* Changed by uml_initrd_setup, which is a setup */ +static char *initrd __initdata = NULL; + +static int __init read_initrd(void) +{ + void *area; + long long size; + int err; + + if (initrd == NULL) + return 0; + + err = os_file_size(initrd, &size); + if (err) + return 0; + + /* + * This is necessary because alloc_bootmem craps out if you + * ask for no memory. + */ + if (size == 0) { + printk(KERN_ERR "\"%s\" is a zero-size initrd\n", initrd); + return 0; + } + + area = alloc_bootmem(size); + if (area == NULL) + return 0; + + if (load_initrd(initrd, area, size) == -1) + return 0; + + initrd_start = (unsigned long) area; + initrd_end = initrd_start + size; + return 0; +} + +__uml_postsetup(read_initrd); + +static int __init uml_initrd_setup(char *line, int *add) +{ + initrd = line; + return 0; +} + +__uml_setup("initrd=", uml_initrd_setup, +"initrd=<initrd image>\n" +" This is used to boot UML from an initrd image. The argument is the\n" +" name of the file containing the image.\n\n" +); + +int load_initrd(char *filename, void *buf, int size) +{ + int fd, n; + + fd = os_open_file(filename, of_read(OPENFLAGS()), 0); + if (fd < 0) { + printk(KERN_ERR "Opening '%s' failed - err = %d\n", filename, + -fd); + return -1; + } + n = os_read_file(fd, buf, size); + if (n != size) { + printk(KERN_ERR "Read of %d bytes from '%s' failed, " + "err = %d\n", size, + filename, -n); + return -1; + } + + os_close_file(fd); + return 0; +} diff --git a/arch/um/kernel/internal.h b/arch/um/kernel/internal.h new file mode 100644 index 0000000..3bda43c --- /dev/null +++ b/arch/um/kernel/internal.h @@ -0,0 +1 @@ +extern long um_execve(char *file, char __user *__user *argv, char __user *__user *env); diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c new file mode 100644 index 0000000..3d7aad0 --- /dev/null +++ b/arch/um/kernel/irq.c @@ -0,0 +1,516 @@ +/* + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + * Derived (i.e. mostly copied) from arch/i386/kernel/irq.c: + * Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar + */ + +#include "linux/cpumask.h" +#include "linux/hardirq.h" +#include "linux/interrupt.h" +#include "linux/kernel_stat.h" +#include "linux/module.h" +#include "linux/seq_file.h" +#include "as-layout.h" +#include "kern_util.h" +#include "os.h" + +/* + * Generic, controller-independent functions: + */ + +int show_interrupts(struct seq_file *p, void *v) +{ + int i = *(loff_t *) v, j; + struct irqaction * action; + unsigned long flags; + + if (i == 0) { + seq_printf(p, " "); + for_each_online_cpu(j) + seq_printf(p, "CPU%d ",j); + seq_putc(p, '\n'); + } + + if (i < NR_IRQS) { + spin_lock_irqsave(&irq_desc[i].lock, flags); + action = irq_desc[i].action; + if (!action) + goto skip; + seq_printf(p, "%3d: ",i); +#ifndef CONFIG_SMP + seq_printf(p, "%10u ", kstat_irqs(i)); +#else + for_each_online_cpu(j) + seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]); +#endif + seq_printf(p, " %14s", irq_desc[i].chip->typename); + seq_printf(p, " %s", action->name); + + for (action=action->next; action; action = action->next) + seq_printf(p, ", %s", action->name); + + seq_putc(p, '\n'); +skip: + spin_unlock_irqrestore(&irq_desc[i].lock, flags); + } else if (i == NR_IRQS) + seq_putc(p, '\n'); + + return 0; +} + +/* + * This list is accessed under irq_lock, except in sigio_handler, + * where it is safe from being modified. IRQ handlers won't change it - + * if an IRQ source has vanished, it will be freed by free_irqs just + * before returning from sigio_handler. That will process a separate + * list of irqs to free, with its own locking, coming back here to + * remove list elements, taking the irq_lock to do so. + */ +static struct irq_fd *active_fds = NULL; +static struct irq_fd **last_irq_ptr = &active_fds; + +extern void free_irqs(void); + +void sigio_handler(int sig, struct uml_pt_regs *regs) +{ + struct irq_fd *irq_fd; + int n; + + if (smp_sigio_handler()) + return; + + while (1) { + n = os_waiting_for_events(active_fds); + if (n <= 0) { + if (n == -EINTR) + continue; + else break; + } + + for (irq_fd = active_fds; irq_fd != NULL; + irq_fd = irq_fd->next) { + if (irq_fd->current_events != 0) { + irq_fd->current_events = 0; + do_IRQ(irq_fd->irq, regs); + } + } + } + + free_irqs(); +} + +static DEFINE_SPINLOCK(irq_lock); + +static int activate_fd(int irq, int fd, int type, void *dev_id) +{ + struct pollfd *tmp_pfd; + struct irq_fd *new_fd, *irq_fd; + unsigned long flags; + int events, err, n; + + err = os_set_fd_async(fd); + if (err < 0) + goto out; + + err = -ENOMEM; + new_fd = kmalloc(sizeof(struct irq_fd), GFP_KERNEL); + if (new_fd == NULL) + goto out; + + if (type == IRQ_READ) + events = UM_POLLIN | UM_POLLPRI; + else events = UM_POLLOUT; + *new_fd = ((struct irq_fd) { .next = NULL, + .id = dev_id, + .fd = fd, + .type = type, + .irq = irq, + .events = events, + .current_events = 0 } ); + + err = -EBUSY; + spin_lock_irqsave(&irq_lock, flags); + for (irq_fd = active_fds; irq_fd != NULL; irq_fd = irq_fd->next) { + if ((irq_fd->fd == fd) && (irq_fd->type == type)) { + printk(KERN_ERR "Registering fd %d twice\n", fd); + printk(KERN_ERR "Irqs : %d, %d\n", irq_fd->irq, irq); + printk(KERN_ERR "Ids : 0x%p, 0x%p\n", irq_fd->id, + dev_id); + goto out_unlock; + } + } + + if (type == IRQ_WRITE) + fd = -1; + + tmp_pfd = NULL; + n = 0; + + while (1) { + n = os_create_pollfd(fd, events, tmp_pfd, n); + if (n == 0) + break; + + /* + * n > 0 + * It means we couldn't put new pollfd to current pollfds + * and tmp_fds is NULL or too small for new pollfds array. + * Needed size is equal to n as minimum. + * + * Here we have to drop the lock in order to call + * kmalloc, which might sleep. + * If something else came in and changed the pollfds array + * so we will not be able to put new pollfd struct to pollfds + * then we free the buffer tmp_fds and try again. + */ + spin_unlock_irqrestore(&irq_lock, flags); + kfree(tmp_pfd); + + tmp_pfd = kmalloc(n, GFP_KERNEL); + if (tmp_pfd == NULL) + goto out_kfree; + + spin_lock_irqsave(&irq_lock, flags); + } + + *last_irq_ptr = new_fd; + last_irq_ptr = &new_fd->next; + + spin_unlock_irqrestore(&irq_lock, flags); + + /* + * This calls activate_fd, so it has to be outside the critical + * section. + */ + maybe_sigio_broken(fd, (type == IRQ_READ)); + + return 0; + + out_unlock: + spin_unlock_irqrestore(&irq_lock, flags); + out_kfree: + kfree(new_fd); + out: + return err; +} + +static void free_irq_by_cb(int (*test)(struct irq_fd *, void *), void *arg) +{ + unsigned long flags; + + spin_lock_irqsave(&irq_lock, flags); + os_free_irq_by_cb(test, arg, active_fds, &last_irq_ptr); + spin_unlock_irqrestore(&irq_lock, flags); +} + +struct irq_and_dev { + int irq; + void *dev; +}; + +static int same_irq_and_dev(struct irq_fd *irq, void *d) +{ + struct irq_and_dev *data = d; + + return ((irq->irq == data->irq) && (irq->id == data->dev)); +} + +static void free_irq_by_irq_and_dev(unsigned int irq, void *dev) +{ + struct irq_and_dev data = ((struct irq_and_dev) { .irq = irq, + .dev = dev }); + + free_irq_by_cb(same_irq_and_dev, &data); +} + +static int same_fd(struct irq_fd *irq, void *fd) +{ + return (irq->fd == *((int *)fd)); +} + +void free_irq_by_fd(int fd) +{ + free_irq_by_cb(same_fd, &fd); +} + +/* Must be called with irq_lock held */ +static struct irq_fd *find_irq_by_fd(int fd, int irqnum, int *index_out) +{ + struct irq_fd *irq; + int i = 0; + int fdi; + + for (irq = active_fds; irq != NULL; irq = irq->next) { + if ((irq->fd == fd) && (irq->irq == irqnum)) + break; + i++; + } + if (irq == NULL) { + printk(KERN_ERR "find_irq_by_fd doesn't have descriptor %d\n", + fd); + goto out; + } + fdi = os_get_pollfd(i); + if ((fdi != -1) && (fdi != fd)) { + printk(KERN_ERR "find_irq_by_fd - mismatch between active_fds " + "and pollfds, fd %d vs %d, need %d\n", irq->fd, + fdi, fd); + irq = NULL; + goto out; + } + *index_out = i; + out: + return irq; +} + +void reactivate_fd(int fd, int irqnum) +{ + struct irq_fd *irq; + unsigned long flags; + int i; + + spin_lock_irqsave(&irq_lock, flags); + irq = find_irq_by_fd(fd, irqnum, &i); + if (irq == NULL) { + spin_unlock_irqrestore(&irq_lock, flags); + return; + } + os_set_pollfd(i, irq->fd); + spin_unlock_irqrestore(&irq_lock, flags); + + add_sigio_fd(fd); +} + +void deactivate_fd(int fd, int irqnum) +{ + struct irq_fd *irq; + unsigned long flags; + int i; + + spin_lock_irqsave(&irq_lock, flags); + irq = find_irq_by_fd(fd, irqnum, &i); + if (irq == NULL) { + spin_unlock_irqrestore(&irq_lock, flags); + return; + } + + os_set_pollfd(i, -1); + spin_unlock_irqrestore(&irq_lock, flags); + + ignore_sigio_fd(fd); +} + +/* + * Called just before shutdown in order to provide a clean exec + * environment in case the system is rebooting. No locking because + * that would cause a pointless shutdown hang if something hadn't + * released the lock. + */ +int deactivate_all_fds(void) +{ + struct irq_fd *irq; + int err; + + for (irq = active_fds; irq != NULL; irq = irq->next) { + err = os_clear_fd_async(irq->fd); + if (err) + return err; + } + /* If there is a signal already queued, after unblocking ignore it */ + os_set_ioignore(); + + return 0; +} + +/* + * do_IRQ handles all normal device IRQs (the special + * SMP cross-CPU interrupts have their own specific + * handlers). + */ +unsigned int do_IRQ(int irq, struct uml_pt_regs *regs) +{ + struct pt_regs *old_regs = set_irq_regs((struct pt_regs *)regs); + irq_enter(); + __do_IRQ(irq); + irq_exit(); + set_irq_regs(old_regs); + return 1; +} + +int um_request_irq(unsigned int irq, int fd, int type, + irq_handler_t handler, + unsigned long irqflags, const char * devname, + void *dev_id) +{ + int err; + + if (fd != -1) { + err = activate_fd(irq, fd, type, dev_id); + if (err) + return err; + } + + return request_irq(irq, handler, irqflags, devname, dev_id); +} + +EXPORT_SYMBOL(um_request_irq); +EXPORT_SYMBOL(reactivate_fd); + +/* + * hw_interrupt_type must define (startup || enable) && + * (shutdown || disable) && end + */ +static void dummy(unsigned int irq) +{ +} + +/* This is used for everything else than the timer. */ +static struct hw_interrupt_type normal_irq_type = { + .typename = "SIGIO", + .release = free_irq_by_irq_and_dev, + .disable = dummy, + .enable = dummy, + .ack = dummy, + .end = dummy +}; + +static struct hw_interrupt_type SIGVTALRM_irq_type = { + .typename = "SIGVTALRM", + .release = free_irq_by_irq_and_dev, + .shutdown = dummy, /* never called */ + .disable = dummy, + .enable = dummy, + .ack = dummy, + .end = dummy +}; + +void __init init_IRQ(void) +{ + int i; + + irq_desc[TIMER_IRQ].status = IRQ_DISABLED; + irq_desc[TIMER_IRQ].action = NULL; + irq_desc[TIMER_IRQ].depth = 1; + irq_desc[TIMER_IRQ].chip = &SIGVTALRM_irq_type; + enable_irq(TIMER_IRQ); + for (i = 1; i < NR_IRQS; i++) { + irq_desc[i].status = IRQ_DISABLED; + irq_desc[i].action = NULL; + irq_desc[i].depth = 1; + irq_desc[i].chip = &normal_irq_type; + enable_irq(i); + } +} + +/* + * IRQ stack entry and exit: + * + * Unlike i386, UML doesn't receive IRQs on the normal kernel stack + * and switch over to the IRQ stack after some preparation. We use + * sigaltstack to receive signals on a separate stack from the start. + * These two functions make sure the rest of the kernel won't be too + * upset by being on a different stack. The IRQ stack has a + * thread_info structure at the bottom so that current et al continue + * to work. + * + * to_irq_stack copies the current task's thread_info to the IRQ stack + * thread_info and sets the tasks's stack to point to the IRQ stack. + * + * from_irq_stack copies the thread_info struct back (flags may have + * been modified) and resets the task's stack pointer. + * + * Tricky bits - + * + * What happens when two signals race each other? UML doesn't block + * signals with sigprocmask, SA_DEFER, or sa_mask, so a second signal + * could arrive while a previous one is still setting up the + * thread_info. + * + * There are three cases - + * The first interrupt on the stack - sets up the thread_info and + * handles the interrupt + * A nested interrupt interrupting the copying of the thread_info - + * can't handle the interrupt, as the stack is in an unknown state + * A nested interrupt not interrupting the copying of the + * thread_info - doesn't do any setup, just handles the interrupt + * + * The first job is to figure out whether we interrupted stack setup. + * This is done by xchging the signal mask with thread_info->pending. + * If the value that comes back is zero, then there is no setup in + * progress, and the interrupt can be handled. If the value is + * non-zero, then there is stack setup in progress. In order to have + * the interrupt handled, we leave our signal in the mask, and it will + * be handled by the upper handler after it has set up the stack. + * + * Next is to figure out whether we are the outer handler or a nested + * one. As part of setting up the stack, thread_info->real_thread is + * set to non-NULL (and is reset to NULL on exit). This is the + * nesting indicator. If it is non-NULL, then the stack is already + * set up and the handler can run. + */ + +static unsigned long pending_mask; + +unsigned long to_irq_stack(unsigned long *mask_out) +{ + struct thread_info *ti; + unsigned long mask, old; + int nested; + + mask = xchg(&pending_mask, *mask_out); + if (mask != 0) { + /* + * If any interrupts come in at this point, we want to + * make sure that their bits aren't lost by our + * putting our bit in. So, this loop accumulates bits + * until xchg returns the same value that we put in. + * When that happens, there were no new interrupts, + * and pending_mask contains a bit for each interrupt + * that came in. + */ + old = *mask_out; + do { + old |= mask; + mask = xchg(&pending_mask, old); + } while (mask != old); + return 1; + } + + ti = current_thread_info(); + nested = (ti->real_thread != NULL); + if (!nested) { + struct task_struct *task; + struct thread_info *tti; + + task = cpu_tasks[ti->cpu].task; + tti = task_thread_info(task); + + *ti = *tti; + ti->real_thread = tti; + task->stack = ti; + } + + mask = xchg(&pending_mask, 0); + *mask_out |= mask | nested; + return 0; +} + +unsigned long from_irq_stack(int nested) +{ + struct thread_info *ti, *to; + unsigned long mask; + + ti = current_thread_info(); + + pending_mask = 1; + + to = ti->real_thread; + current->stack = to; + ti->real_thread = NULL; + *to = *ti; + + mask = xchg(&pending_mask, 0); + return mask & ~1; +} + diff --git a/arch/um/kernel/ksyms.c b/arch/um/kernel/ksyms.c new file mode 100644 index 0000000..836fc9b --- /dev/null +++ b/arch/um/kernel/ksyms.c @@ -0,0 +1,77 @@ +/* + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include "linux/module.h" +#include "linux/syscalls.h" +#include "asm/tlbflush.h" +#include "asm/uaccess.h" +#include "as-layout.h" +#include "kern_util.h" +#include "mem_user.h" +#include "os.h" + +EXPORT_SYMBOL(uml_physmem); +EXPORT_SYMBOL(set_signals); +EXPORT_SYMBOL(get_signals); +EXPORT_SYMBOL(kernel_thread); +EXPORT_SYMBOL(sys_waitpid); +EXPORT_SYMBOL(flush_tlb_range); + +EXPORT_SYMBOL(high_physmem); +EXPORT_SYMBOL(empty_zero_page); +EXPORT_SYMBOL(handle_page_fault); +EXPORT_SYMBOL(find_iomem); + +EXPORT_SYMBOL(strnlen_user); +EXPORT_SYMBOL(strncpy_from_user); +EXPORT_SYMBOL(copy_to_user); +EXPORT_SYMBOL(copy_from_user); +EXPORT_SYMBOL(clear_user); +EXPORT_SYMBOL(uml_strdup); + +EXPORT_SYMBOL(os_stat_fd); +EXPORT_SYMBOL(os_stat_file); +EXPORT_SYMBOL(os_access); +EXPORT_SYMBOL(os_set_exec_close); +EXPORT_SYMBOL(os_getpid); +EXPORT_SYMBOL(os_open_file); +EXPORT_SYMBOL(os_read_file); +EXPORT_SYMBOL(os_write_file); +EXPORT_SYMBOL(os_seek_file); +EXPORT_SYMBOL(os_lock_file); +EXPORT_SYMBOL(os_ioctl_generic); +EXPORT_SYMBOL(os_pipe); +EXPORT_SYMBOL(os_file_type); +EXPORT_SYMBOL(os_file_mode); +EXPORT_SYMBOL(os_file_size); +EXPORT_SYMBOL(os_flush_stdout); +EXPORT_SYMBOL(os_close_file); +EXPORT_SYMBOL(os_set_fd_async); +EXPORT_SYMBOL(os_set_fd_block); +EXPORT_SYMBOL(helper_wait); +EXPORT_SYMBOL(os_shutdown_socket); +EXPORT_SYMBOL(os_create_unix_socket); +EXPORT_SYMBOL(os_connect_socket); +EXPORT_SYMBOL(os_accept_connection); +EXPORT_SYMBOL(os_rcv_fd); +EXPORT_SYMBOL(run_helper); +EXPORT_SYMBOL(start_thread); + +EXPORT_SYMBOL(add_sigio_fd); +EXPORT_SYMBOL(ignore_sigio_fd); +EXPORT_SYMBOL(deactivate_fd); +EXPORT_SYMBOL(sigio_broken); + +#ifdef CONFIG_SMP + +/* required for SMP */ + +extern void __write_lock_failed(rwlock_t *rw); +EXPORT_SYMBOL(__write_lock_failed); + +extern void __read_lock_failed(rwlock_t *rw); +EXPORT_SYMBOL(__read_lock_failed); + +#endif diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c new file mode 100644 index 0000000..61d7e61 --- /dev/null +++ b/arch/um/kernel/mem.c @@ -0,0 +1,320 @@ +/* + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include <linux/stddef.h> +#include <linux/bootmem.h> +#include <linux/gfp.h> +#include <linux/highmem.h> +#include <linux/mm.h> +#include <linux/swap.h> +#include <asm/fixmap.h> +#include <asm/page.h> +#include "as-layout.h" +#include "init.h" +#include "kern.h" +#include "kern_util.h" +#include "mem_user.h" +#include "os.h" + +/* allocated in paging_init, zeroed in mem_init, and unchanged thereafter */ +unsigned long *empty_zero_page = NULL; +/* allocated in paging_init and unchanged thereafter */ +static unsigned long *empty_bad_page = NULL; + +/* + * Initialized during boot, and readonly for initializing page tables + * afterwards + */ +pgd_t swapper_pg_dir[PTRS_PER_PGD]; + +/* Initialized at boot time, and readonly after that */ +unsigned long long highmem; +int kmalloc_ok = 0; + +/* Used during early boot */ +static unsigned long brk_end; + +#ifdef CONFIG_HIGHMEM +static void setup_highmem(unsigned long highmem_start, + unsigned long highmem_len) +{ + struct page *page; + unsigned long highmem_pfn; + int i; + + highmem_pfn = __pa(highmem_start) >> PAGE_SHIFT; + for (i = 0; i < highmem_len >> PAGE_SHIFT; i++) { + page = &mem_map[highmem_pfn + i]; + ClearPageReserved(page); + init_page_count(page); + __free_page(page); + } +} +#endif + +void __init mem_init(void) +{ + /* clear the zero-page */ + memset(empty_zero_page, 0, PAGE_SIZE); + + /* Map in the area just after the brk now that kmalloc is about + * to be turned on. + */ + brk_end = (unsigned long) UML_ROUND_UP(sbrk(0)); + map_memory(brk_end, __pa(brk_end), uml_reserved - brk_end, 1, 1, 0); + free_bootmem(__pa(brk_end), uml_reserved - brk_end); + uml_reserved = brk_end; + + /* this will put all low memory onto the freelists */ + totalram_pages = free_all_bootmem(); + max_low_pfn = totalram_pages; +#ifdef CONFIG_HIGHMEM + totalhigh_pages = highmem >> PAGE_SHIFT; + totalram_pages += totalhigh_pages; +#endif + num_physpages = totalram_pages; + max_pfn = totalram_pages; + printk(KERN_INFO "Memory: %luk available\n", + (unsigned long) nr_free_pages() << (PAGE_SHIFT-10)); + kmalloc_ok = 1; + +#ifdef CONFIG_HIGHMEM + setup_highmem(end_iomem, highmem); +#endif +} + +/* + * Create a page table and place a pointer to it in a middle page + * directory entry. + */ +static void __init one_page_table_init(pmd_t *pmd) +{ + if (pmd_none(*pmd)) { + pte_t *pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); + set_pmd(pmd, __pmd(_KERNPG_TABLE + + (unsigned long) __pa(pte))); + if (pte != pte_offset_kernel(pmd, 0)) + BUG(); + } +} + +static void __init one_md_table_init(pud_t *pud) +{ +#ifdef CONFIG_3_LEVEL_PGTABLES + pmd_t *pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE); + set_pud(pud, __pud(_KERNPG_TABLE + (unsigned long) __pa(pmd_table))); + if (pmd_table != pmd_offset(pud, 0)) + BUG(); +#endif +} + +static void __init fixrange_init(unsigned long start, unsigned long end, + pgd_t *pgd_base) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + int i, j; + unsigned long vaddr; + + vaddr = start; + i = pgd_index(vaddr); + j = pmd_index(vaddr); + pgd = pgd_base + i; + + for ( ; (i < PTRS_PER_PGD) && (vaddr < end); pgd++, i++) { + pud = pud_offset(pgd, vaddr); + if (pud_none(*pud)) + one_md_table_init(pud); + pmd = pmd_offset(pud, vaddr); + for (; (j < PTRS_PER_PMD) && (vaddr < end); pmd++, j++) { + one_page_table_init(pmd); + vaddr += PMD_SIZE; + } + j = 0; + } +} + +#ifdef CONFIG_HIGHMEM +pte_t *kmap_pte; +pgprot_t kmap_prot; + +#define kmap_get_fixmap_pte(vaddr) \ + pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(vaddr), (vaddr)),\ + (vaddr)), (vaddr)) + +static void __init kmap_init(void) +{ + unsigned long kmap_vstart; + + /* cache the first kmap pte */ + kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN); + kmap_pte = kmap_get_fixmap_pte(kmap_vstart); + + kmap_prot = PAGE_KERNEL; +} + +static void __init init_highmem(void) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + unsigned long vaddr; + + /* + * Permanent kmaps: + */ + vaddr = PKMAP_BASE; + fixrange_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, swapper_pg_dir); + + pgd = swapper_pg_dir + pgd_index(vaddr); + pud = pud_offset(pgd, vaddr); + pmd = pmd_offset(pud, vaddr); + pte = pte_offset_kernel(pmd, vaddr); + pkmap_page_table = pte; + + kmap_init(); +} +#endif /* CONFIG_HIGHMEM */ + +static void __init fixaddr_user_init( void) +{ +#ifdef CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA + long size = FIXADDR_USER_END - FIXADDR_USER_START; + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + phys_t p; + unsigned long v, vaddr = FIXADDR_USER_START; + + if (!size) + return; + + fixrange_init( FIXADDR_USER_START, FIXADDR_USER_END, swapper_pg_dir); + v = (unsigned long) alloc_bootmem_low_pages(size); + memcpy((void *) v , (void *) FIXADDR_USER_START, size); + p = __pa(v); + for ( ; size > 0; size -= PAGE_SIZE, vaddr += PAGE_SIZE, + p += PAGE_SIZE) { + pgd = swapper_pg_dir + pgd_index(vaddr); + pud = pud_offset(pgd, vaddr); + pmd = pmd_offset(pud, vaddr); + pte = pte_offset_kernel(pmd, vaddr); + pte_set_val(*pte, p, PAGE_READONLY); + } +#endif +} + +void __init paging_init(void) +{ + unsigned long zones_size[MAX_NR_ZONES], vaddr; + int i; + + empty_zero_page = (unsigned long *) alloc_bootmem_low_pages(PAGE_SIZE); + empty_bad_page = (unsigned long *) alloc_bootmem_low_pages(PAGE_SIZE); + for (i = 0; i < ARRAY_SIZE(zones_size); i++) + zones_size[i] = 0; + + zones_size[ZONE_NORMAL] = (end_iomem >> PAGE_SHIFT) - + (uml_physmem >> PAGE_SHIFT); +#ifdef CONFIG_HIGHMEM + zones_size[ZONE_HIGHMEM] = highmem >> PAGE_SHIFT; +#endif + free_area_init(zones_size); + + /* + * Fixed mappings, only the page table structure has to be + * created - mappings will be set by set_fixmap(): + */ + vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK; + fixrange_init(vaddr, FIXADDR_TOP, swapper_pg_dir); + + fixaddr_user_init(); + +#ifdef CONFIG_HIGHMEM + init_highmem(); +#endif +} + +/* + * This can't do anything because nothing in the kernel image can be freed + * since it's not in kernel physical memory. + */ + +void free_initmem(void) +{ +} + +#ifdef CONFIG_BLK_DEV_INITRD +void free_initrd_mem(unsigned long start, unsigned long end) +{ + if (start < end) + printk(KERN_INFO "Freeing initrd memory: %ldk freed\n", + (end - start) >> 10); + for (; start < end; start += PAGE_SIZE) { + ClearPageReserved(virt_to_page(start)); + init_page_count(virt_to_page(start)); + free_page(start); + totalram_pages++; + } +} +#endif + +/* Allocate and free page tables. */ + +pgd_t *pgd_alloc(struct mm_struct *mm) +{ + pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL); + + if (pgd) { + memset(pgd, 0, USER_PTRS_PER_PGD * sizeof(pgd_t)); + memcpy(pgd + USER_PTRS_PER_PGD, + swapper_pg_dir + USER_PTRS_PER_PGD, + (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t)); + } + return pgd; +} + +void pgd_free(struct mm_struct *mm, pgd_t *pgd) +{ + free_page((unsigned long) pgd); +} + +pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) +{ + pte_t *pte; + + pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO); + return pte; +} + +pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) +{ + struct page *pte; + + pte = alloc_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO); + if (pte) + pgtable_page_ctor(pte); + return pte; +} + +#ifdef CONFIG_3_LEVEL_PGTABLES +pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address) +{ + pmd_t *pmd = (pmd_t *) __get_free_page(GFP_KERNEL); + + if (pmd) + memset(pmd, 0, PAGE_SIZE); + + return pmd; +} +#endif + +void *uml_kmalloc(int size, int flags) +{ + return kmalloc(size, flags); +} diff --git a/arch/um/kernel/physmem.c b/arch/um/kernel/physmem.c new file mode 100644 index 0000000..a1a9090 --- /dev/null +++ b/arch/um/kernel/physmem.c @@ -0,0 +1,212 @@ +/* + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include "linux/bootmem.h" +#include "linux/mm.h" +#include "linux/pfn.h" +#include "asm/page.h" +#include "as-layout.h" +#include "init.h" +#include "kern.h" +#include "mem_user.h" +#include "os.h" + +static int physmem_fd = -1; + +/* Changed during early boot */ +unsigned long high_physmem; + +extern unsigned long long physmem_size; + +int __init init_maps(unsigned long physmem, unsigned long iomem, + unsigned long highmem) +{ + struct page *p, *map; + unsigned long phys_len, phys_pages, highmem_len, highmem_pages; + unsigned long iomem_len, iomem_pages, total_len, total_pages; + int i; + + phys_pages = physmem >> PAGE_SHIFT; + phys_len = phys_pages * sizeof(struct page); + + iomem_pages = iomem >> PAGE_SHIFT; + iomem_len = iomem_pages * sizeof(struct page); + + highmem_pages = highmem >> PAGE_SHIFT; + highmem_len = highmem_pages * sizeof(struct page); + + total_pages = phys_pages + iomem_pages + highmem_pages; + total_len = phys_len + iomem_len + highmem_len; + + map = alloc_bootmem_low_pages(total_len); + if (map == NULL) + return -ENOMEM; + + for (i = 0; i < total_pages; i++) { + p = &map[i]; + memset(p, 0, sizeof(struct page)); + SetPageReserved(p); + INIT_LIST_HEAD(&p->lru); + } + + max_mapnr = total_pages; + return 0; +} + +void map_memory(unsigned long virt, unsigned long phys, unsigned long len, + int r, int w, int x) +{ + __u64 offset; + int fd, err; + + fd = phys_mapping(phys, &offset); + err = os_map_memory((void *) virt, fd, offset, len, r, w, x); + if (err) { + if (err == -ENOMEM) + printk(KERN_ERR "try increasing the host's " + "/proc/sys/vm/max_map_count to <physical " + "memory size>/4096\n"); + panic("map_memory(0x%lx, %d, 0x%llx, %ld, %d, %d, %d) failed, " + "err = %d\n", virt, fd, offset, len, r, w, x, err); + } +} + +extern int __syscall_stub_start; + +void __init setup_physmem(unsigned long start, unsigned long reserve_end, + unsigned long len, unsigned long long highmem) +{ + unsigned long reserve = reserve_end - start; + int pfn = PFN_UP(__pa(reserve_end)); + int delta = (len - reserve) >> PAGE_SHIFT; + int err, offset, bootmap_size; + + physmem_fd = create_mem_file(len + highmem); + + offset = uml_reserved - uml_physmem; + err = os_map_memory((void *) uml_reserved, physmem_fd, offset, + len - offset, 1, 1, 1); + if (err < 0) { + printf("setup_physmem - mapping %ld bytes of memory at 0x%p " + "failed - errno = %d\n", len - offset, + (void *) uml_reserved, err); + exit(1); + } + + /* + * Special kludge - This page will be mapped in to userspace processes + * from physmem_fd, so it needs to be written out there. + */ + os_seek_file(physmem_fd, __pa(&__syscall_stub_start)); + os_write_file(physmem_fd, &__syscall_stub_start, PAGE_SIZE); + + bootmap_size = init_bootmem(pfn, pfn + delta); + free_bootmem(__pa(reserve_end) + bootmap_size, + len - bootmap_size - reserve); +} + +int phys_mapping(unsigned long phys, unsigned long long *offset_out) +{ + int fd = -1; + + if (phys < physmem_size) { + fd = physmem_fd; + *offset_out = phys; + } + else if (phys < __pa(end_iomem)) { + struct iomem_region *region = iomem_regions; + + while (region != NULL) { + if ((phys >= region->phys) && + (phys < region->phys + region->size)) { + fd = region->fd; + *offset_out = phys - region->phys; + break; + } + region = region->next; + } + } + else if (phys < __pa(end_iomem) + highmem) { + fd = physmem_fd; + *offset_out = phys - iomem_size; + } + + return fd; +} + +static int __init uml_mem_setup(char *line, int *add) +{ + char *retptr; + physmem_size = memparse(line,&retptr); + return 0; +} +__uml_setup("mem=", uml_mem_setup, +"mem=<Amount of desired ram>\n" +" This controls how much \"physical\" memory the kernel allocates\n" +" for the system. The size is specified as a number followed by\n" +" one of 'k', 'K', 'm', 'M', which have the obvious meanings.\n" +" This is not related to the amount of memory in the host. It can\n" +" be more, and the excess, if it's ever used, will just be swapped out.\n" +" Example: mem=64M\n\n" +); + +extern int __init parse_iomem(char *str, int *add); + +__uml_setup("iomem=", parse_iomem, +"iomem=<name>,<file>\n" +" Configure <file> as an IO memory region named <name>.\n\n" +); + +/* + * This list is constructed in parse_iomem and addresses filled in in + * setup_iomem, both of which run during early boot. Afterwards, it's + * unchanged. + */ +struct iomem_region *iomem_regions; + +/* Initialized in parse_iomem and unchanged thereafter */ +int iomem_size; + +unsigned long find_iomem(char *driver, unsigned long *len_out) +{ + struct iomem_region *region = iomem_regions; + + while (region != NULL) { + if (!strcmp(region->driver, driver)) { + *len_out = region->size; + return region->virt; + } + + region = region->next; + } + + return 0; +} + +static int setup_iomem(void) +{ + struct iomem_region *region = iomem_regions; + unsigned long iomem_start = high_physmem + PAGE_SIZE; + int err; + + while (region != NULL) { + err = os_map_memory((void *) iomem_start, region->fd, 0, + region->size, 1, 1, 0); + if (err) + printk(KERN_ERR "Mapping iomem region for driver '%s' " + "failed, errno = %d\n", region->driver, -err); + else { + region->virt = iomem_start; + region->phys = __pa(region->virt); + } + + iomem_start += region->size + PAGE_SIZE; + region = region->next; + } + + return 0; +} + +__initcall(setup_iomem); diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c new file mode 100644 index 0000000..a1c6d07 --- /dev/null +++ b/arch/um/kernel/process.c @@ -0,0 +1,453 @@ +/* + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Copyright 2003 PathScale, Inc. + * Licensed under the GPL + */ + +#include <linux/stddef.h> +#include <linux/err.h> +#include <linux/hardirq.h> +#include <linux/gfp.h> +#include <linux/mm.h> +#include <linux/personality.h> +#include <linux/proc_fs.h> +#include <linux/ptrace.h> +#include <linux/random.h> +#include <linux/sched.h> +#include <linux/tick.h> +#include <linux/threads.h> +#include <asm/current.h> +#include <asm/pgtable.h> +#include <asm/uaccess.h> +#include "as-layout.h" +#include "kern_util.h" +#include "os.h" +#include "skas.h" +#include "tlb.h" + +/* + * This is a per-cpu array. A processor only modifies its entry and it only + * cares about its entry, so it's OK if another processor is modifying its + * entry. + */ +struct cpu_task cpu_tasks[NR_CPUS] = { [0 ... NR_CPUS - 1] = { -1, NULL } }; + +static inline int external_pid(void) +{ + /* FIXME: Need to look up userspace_pid by cpu */ + return userspace_pid[0]; +} + +int pid_to_processor_id(int pid) +{ + int i; + + for (i = 0; i < ncpus; i++) { + if (cpu_tasks[i].pid == pid) + return i; + } + return -1; +} + +void free_stack(unsigned long stack, int order) +{ + free_pages(stack, order); +} + +unsigned long alloc_stack(int order, int atomic) +{ + unsigned long page; + gfp_t flags = GFP_KERNEL; + + if (atomic) + flags = GFP_ATOMIC; + page = __get_free_pages(flags, order); + + return page; +} + +int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) +{ + int pid; + + current->thread.request.u.thread.proc = fn; + current->thread.request.u.thread.arg = arg; + pid = do_fork(CLONE_VM | CLONE_UNTRACED | flags, 0, + ¤t->thread.regs, 0, NULL, NULL); + return pid; +} + +static inline void set_current(struct task_struct *task) +{ + cpu_tasks[task_thread_info(task)->cpu] = ((struct cpu_task) + { external_pid(), task }); +} + +extern void arch_switch_to(struct task_struct *to); + +void *_switch_to(void *prev, void *next, void *last) +{ + struct task_struct *from = prev; + struct task_struct *to = next; + + to->thread.prev_sched = from; + set_current(to); + + do { + current->thread.saved_task = NULL; + + switch_threads(&from->thread.switch_buf, + &to->thread.switch_buf); + + arch_switch_to(current); + + if (current->thread.saved_task) + show_regs(&(current->thread.regs)); + to = current->thread.saved_task; + from = current; + } while (current->thread.saved_task); + + return current->thread.prev_sched; + +} + +void interrupt_end(void) +{ + if (need_resched()) + schedule(); + if (test_tsk_thread_flag(current, TIF_SIGPENDING)) + do_signal(); +} + +void exit_thread(void) +{ +} + +void *get_current(void) +{ + return current; +} + +/* + * This is called magically, by its address being stuffed in a jmp_buf + * and being longjmp-d to. + */ +void new_thread_handler(void) +{ + int (*fn)(void *), n; + void *arg; + + if (current->thread.prev_sched != NULL) + schedule_tail(current->thread.prev_sched); + current->thread.prev_sched = NULL; + + fn = current->thread.request.u.thread.proc; + arg = current->thread.request.u.thread.arg; + + /* + * The return value is 1 if the kernel thread execs a process, + * 0 if it just exits + */ + n = run_kernel_thread(fn, arg, ¤t->thread.exec_buf); + if (n == 1) { + /* Handle any immediate reschedules or signals */ + interrupt_end(); + userspace(¤t->thread.regs.regs); + } + else do_exit(0); +} + +/* Called magically, see new_thread_handler above */ +void fork_handler(void) +{ + force_flush_all(); + + schedule_tail(current->thread.prev_sched); + + /* + * XXX: if interrupt_end() calls schedule, this call to + * arch_switch_to isn't needed. We could want to apply this to + * improve performance. -bb + */ + arch_switch_to(current); + + current->thread.prev_sched = NULL; + + /* Handle any immediate reschedules or signals */ + interrupt_end(); + + userspace(¤t->thread.regs.regs); +} + +int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, + unsigned long stack_top, struct task_struct * p, + struct pt_regs *regs) +{ + void (*handler)(void); + int ret = 0; + + p->thread = (struct thread_struct) INIT_THREAD; + + if (current->thread.forking) { + memcpy(&p->thread.regs.regs, ®s->regs, + sizeof(p->thread.regs.regs)); + REGS_SET_SYSCALL_RETURN(p->thread.regs.regs.gp, 0); + if (sp != 0) + REGS_SP(p->thread.regs.regs.gp) = sp; + + handler = fork_handler; + + arch_copy_thread(¤t->thread.arch, &p->thread.arch); + } + else { + get_safe_registers(p->thread.regs.regs.gp); + p->thread.request.u.thread = current->thread.request.u.thread; + handler = new_thread_handler; + } + + new_thread(task_stack_page(p), &p->thread.switch_buf, handler); + + if (current->thread.forking) { + clear_flushed_tls(p); + + /* + * Set a new TLS for the child thread? + */ + if (clone_flags & CLONE_SETTLS) + ret = arch_copy_tls(p); + } + + return ret; +} + +void initial_thread_cb(void (*proc)(void *), void *arg) +{ + int save_kmalloc_ok = kmalloc_ok; + + kmalloc_ok = 0; + initial_thread_cb_skas(proc, arg); + kmalloc_ok = save_kmalloc_ok; +} + +void default_idle(void) +{ + unsigned long long nsecs; + + while (1) { + /* endless idle loop with no priority at all */ + + /* + * although we are an idle CPU, we do not want to + * get into the scheduler unnecessarily. + */ + if (need_resched()) + schedule(); + + tick_nohz_stop_sched_tick(1); + nsecs = disable_timer(); + idle_sleep(nsecs); + tick_nohz_restart_sched_tick(); + } +} + +void cpu_idle(void) +{ + cpu_tasks[current_thread_info()->cpu].pid = os_getpid(); + default_idle(); +} + +int __cant_sleep(void) { + return in_atomic() || irqs_disabled() || in_interrupt(); + /* Is in_interrupt() really needed? */ +} + +int user_context(unsigned long sp) +{ + unsigned long stack; + + stack = sp & (PAGE_MASK << CONFIG_KERNEL_STACK_ORDER); + return stack != (unsigned long) current_thread_info(); +} + +extern exitcall_t __uml_exitcall_begin, __uml_exitcall_end; + +void do_uml_exitcalls(void) +{ + exitcall_t *call; + + call = &__uml_exitcall_end; + while (--call >= &__uml_exitcall_begin) + (*call)(); +} + +char *uml_strdup(const char *string) +{ + return kstrdup(string, GFP_KERNEL); +} + +int copy_to_user_proc(void __user *to, void *from, int size) +{ + return copy_to_user(to, from, size); +} + +int copy_from_user_proc(void *to, void __user *from, int size) +{ + return copy_from_user(to, from, size); +} + +int clear_user_proc(void __user *buf, int size) +{ + return clear_user(buf, size); +} + +int strlen_user_proc(char __user *str) +{ + return strlen_user(str); +} + +int smp_sigio_handler(void) +{ +#ifdef CONFIG_SMP + int cpu = current_thread_info()->cpu; + IPI_handler(cpu); + if (cpu != 0) + return 1; +#endif + return 0; +} + +int cpu(void) +{ + return current_thread_info()->cpu; +} + +static atomic_t using_sysemu = ATOMIC_INIT(0); +int sysemu_supported; + +void set_using_sysemu(int value) +{ + if (value > sysemu_supported) + return; + atomic_set(&using_sysemu, value); +} + +int get_using_sysemu(void) +{ + return atomic_read(&using_sysemu); +} + +static int proc_read_sysemu(char *buf, char **start, off_t offset, int size,int *eof, void *data) +{ + if (snprintf(buf, size, "%d\n", get_using_sysemu()) < size) + /* No overflow */ + *eof = 1; + + return strlen(buf); +} + +static int proc_write_sysemu(struct file *file,const char __user *buf, unsigned long count,void *data) +{ + char tmp[2]; + + if (copy_from_user(tmp, buf, 1)) + return -EFAULT; + + if (tmp[0] >= '0' && tmp[0] <= '2') + set_using_sysemu(tmp[0] - '0'); + /* We use the first char, but pretend to write everything */ + return count; +} + +int __init make_proc_sysemu(void) +{ + struct proc_dir_entry *ent; + if (!sysemu_supported) + return 0; + + ent = create_proc_entry("sysemu", 0600, NULL); + + if (ent == NULL) + { + printk(KERN_WARNING "Failed to register /proc/sysemu\n"); + return 0; + } + + ent->read_proc = proc_read_sysemu; + ent->write_proc = proc_write_sysemu; + + return 0; +} + +late_initcall(make_proc_sysemu); + +int singlestepping(void * t) +{ + struct task_struct *task = t ? t : current; + + if (!(task->ptrace & PT_DTRACE)) + return 0; + + if (task->thread.singlestep_syscall) + return 1; + + return 2; +} + +/* + * Only x86 and x86_64 have an arch_align_stack(). + * All other arches have "#define arch_align_stack(x) (x)" + * in their asm/system.h + * As this is included in UML from asm-um/system-generic.h, + * we can use it to behave as the subarch does. + */ +#ifndef arch_align_stack +unsigned long arch_align_stack(unsigned long sp) +{ + if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) + sp -= get_random_int() % 8192; + return sp & ~0xf; +} +#endif + +unsigned long get_wchan(struct task_struct *p) +{ + unsigned long stack_page, sp, ip; + bool seen_sched = 0; + + if ((p == NULL) || (p == current) || (p->state == TASK_RUNNING)) + return 0; + + stack_page = (unsigned long) task_stack_page(p); + /* Bail if the process has no kernel stack for some reason */ + if (stack_page == 0) + return 0; + + sp = p->thread.switch_buf->JB_SP; + /* + * Bail if the stack pointer is below the bottom of the kernel + * stack for some reason + */ + if (sp < stack_page) + return 0; + + while (sp < stack_page + THREAD_SIZE) { + ip = *((unsigned long *) sp); + if (in_sched_functions(ip)) + /* Ignore everything until we're above the scheduler */ + seen_sched = 1; + else if (kernel_text_address(ip) && seen_sched) + return ip; + + sp += sizeof(unsigned long); + } + + return 0; +} + +int elf_core_copy_fpregs(struct task_struct *t, elf_fpregset_t *fpu) +{ + int cpu = current_thread_info()->cpu; + + return save_fp_registers(userspace_pid[cpu], (unsigned long *) fpu); +} + diff --git a/arch/um/kernel/ptrace.c b/arch/um/kernel/ptrace.c new file mode 100644 index 0000000..15e8b7c --- /dev/null +++ b/arch/um/kernel/ptrace.c @@ -0,0 +1,294 @@ +/* + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include "linux/audit.h" +#include "linux/ptrace.h" +#include "linux/sched.h" +#include "asm/uaccess.h" +#ifdef CONFIG_PROC_MM +#include "proc_mm.h" +#endif +#include "skas_ptrace.h" + +static inline void set_singlestepping(struct task_struct *child, int on) +{ + if (on) + child->ptrace |= PT_DTRACE; + else + child->ptrace &= ~PT_DTRACE; + child->thread.singlestep_syscall = 0; + +#ifdef SUBARCH_SET_SINGLESTEPPING + SUBARCH_SET_SINGLESTEPPING(child, on); +#endif +} + +/* + * Called by kernel/ptrace.c when detaching.. + */ +void ptrace_disable(struct task_struct *child) +{ + set_singlestepping(child,0); +} + +extern int peek_user(struct task_struct * child, long addr, long data); +extern int poke_user(struct task_struct * child, long addr, long data); + +long arch_ptrace(struct task_struct *child, long request, long addr, long data) +{ + int i, ret; + unsigned long __user *p = (void __user *)(unsigned long)data; + + switch (request) { + /* read word at location addr. */ + case PTRACE_PEEKTEXT: + case PTRACE_PEEKDATA: + ret = generic_ptrace_peekdata(child, addr, data); + break; + + /* read the word at location addr in the USER area. */ + case PTRACE_PEEKUSR: + ret = peek_user(child, addr, data); + break; + + /* write the word at location addr. */ + case PTRACE_POKETEXT: + case PTRACE_POKEDATA: + ret = generic_ptrace_pokedata(child, addr, data); + break; + + /* write the word at location addr in the USER area */ + case PTRACE_POKEUSR: + ret = poke_user(child, addr, data); + break; + + /* continue and stop at next (return from) syscall */ + case PTRACE_SYSCALL: + /* restart after signal. */ + case PTRACE_CONT: { + ret = -EIO; + if (!valid_signal(data)) + break; + + set_singlestepping(child, 0); + if (request == PTRACE_SYSCALL) + set_tsk_thread_flag(child, TIF_SYSCALL_TRACE); + else clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); + child->exit_code = data; + wake_up_process(child); + ret = 0; + break; + } + +/* + * make the child exit. Best I can do is send it a sigkill. + * perhaps it should be put in the status that it wants to + * exit. + */ + case PTRACE_KILL: { + ret = 0; + if (child->exit_state == EXIT_ZOMBIE) /* already dead */ + break; + + set_singlestepping(child, 0); + child->exit_code = SIGKILL; + wake_up_process(child); + break; + } + + case PTRACE_SINGLESTEP: { /* set the trap flag. */ + ret = -EIO; + if (!valid_signal(data)) + break; + clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); + set_singlestepping(child, 1); + child->exit_code = data; + /* give it a chance to run. */ + wake_up_process(child); + ret = 0; + break; + } + +#ifdef PTRACE_GETREGS + case PTRACE_GETREGS: { /* Get all gp regs from the child. */ + if (!access_ok(VERIFY_WRITE, p, MAX_REG_OFFSET)) { + ret = -EIO; + break; + } + for ( i = 0; i < MAX_REG_OFFSET; i += sizeof(long) ) { + __put_user(getreg(child, i), p); + p++; + } + ret = 0; + break; + } +#endif +#ifdef PTRACE_SETREGS + case PTRACE_SETREGS: { /* Set all gp regs in the child. */ + unsigned long tmp = 0; + if (!access_ok(VERIFY_READ, p, MAX_REG_OFFSET)) { + ret = -EIO; + break; + } + for ( i = 0; i < MAX_REG_OFFSET; i += sizeof(long) ) { + __get_user(tmp, p); + putreg(child, i, tmp); + p++; + } + ret = 0; + break; + } +#endif +#ifdef PTRACE_GETFPREGS + case PTRACE_GETFPREGS: /* Get the child FPU state. */ + ret = get_fpregs((struct user_i387_struct __user *) data, + child); + break; +#endif +#ifdef PTRACE_SETFPREGS + case PTRACE_SETFPREGS: /* Set the child FPU state. */ + ret = set_fpregs((struct user_i387_struct __user *) data, + child); + break; +#endif + case PTRACE_GET_THREAD_AREA: + ret = ptrace_get_thread_area(child, addr, + (struct user_desc __user *) data); + break; + + case PTRACE_SET_THREAD_AREA: + ret = ptrace_set_thread_area(child, addr, + (struct user_desc __user *) data); + break; + + case PTRACE_FAULTINFO: { + /* + * Take the info from thread->arch->faultinfo, + * but transfer max. sizeof(struct ptrace_faultinfo). + * On i386, ptrace_faultinfo is smaller! + */ + ret = copy_to_user(p, &child->thread.arch.faultinfo, + sizeof(struct ptrace_faultinfo)); + break; + } + +#ifdef PTRACE_LDT + case PTRACE_LDT: { + struct ptrace_ldt ldt; + + if (copy_from_user(&ldt, p, sizeof(ldt))) { + ret = -EIO; + break; + } + + /* + * This one is confusing, so just punt and return -EIO for + * now + */ + ret = -EIO; + break; + } +#endif +#ifdef CONFIG_PROC_MM + case PTRACE_SWITCH_MM: { + struct mm_struct *old = child->mm; + struct mm_struct *new = proc_mm_get_mm(data); + + if (IS_ERR(new)) { + ret = PTR_ERR(new); + break; + } + + atomic_inc(&new->mm_users); + child->mm = new; + child->active_mm = new; + mmput(old); + ret = 0; + break; + } +#endif +#ifdef PTRACE_ARCH_PRCTL + case PTRACE_ARCH_PRCTL: + /* XXX Calls ptrace on the host - needs some SMP thinking */ + ret = arch_prctl(child, data, (void *) addr); + break; +#endif + default: + ret = ptrace_request(child, request, addr, data); + if (ret == -EIO) + ret = subarch_ptrace(child, request, addr, data); + break; + } + + return ret; +} + +static void send_sigtrap(struct task_struct *tsk, struct uml_pt_regs *regs, + int error_code) +{ + struct siginfo info; + + memset(&info, 0, sizeof(info)); + info.si_signo = SIGTRAP; + info.si_code = TRAP_BRKPT; + + /* User-mode eip? */ + info.si_addr = UPT_IS_USER(regs) ? (void __user *) UPT_IP(regs) : NULL; + + /* Send us the fake SIGTRAP */ + force_sig_info(SIGTRAP, &info, tsk); +} + +/* + * XXX Check PT_DTRACE vs TIF_SINGLESTEP for singlestepping check and + * PT_PTRACED vs TIF_SYSCALL_TRACE for syscall tracing check + */ +void syscall_trace(struct uml_pt_regs *regs, int entryexit) +{ + int is_singlestep = (current->ptrace & PT_DTRACE) && entryexit; + int tracesysgood; + + if (unlikely(current->audit_context)) { + if (!entryexit) + audit_syscall_entry(HOST_AUDIT_ARCH, + UPT_SYSCALL_NR(regs), + UPT_SYSCALL_ARG1(regs), + UPT_SYSCALL_ARG2(regs), + UPT_SYSCALL_ARG3(regs), + UPT_SYSCALL_ARG4(regs)); + else audit_syscall_exit(AUDITSC_RESULT(UPT_SYSCALL_RET(regs)), + UPT_SYSCALL_RET(regs)); + } + + /* Fake a debug trap */ + if (is_singlestep) + send_sigtrap(current, regs, 0); + + if (!test_thread_flag(TIF_SYSCALL_TRACE)) + return; + + if (!(current->ptrace & PT_PTRACED)) + return; + + /* + * the 0x80 provides a way for the tracing parent to distinguish + * between a syscall stop and SIGTRAP delivery + */ + tracesysgood = (current->ptrace & PT_TRACESYSGOOD); + ptrace_notify(SIGTRAP | (tracesysgood ? 0x80 : 0)); + + if (entryexit) /* force do_signal() --> is_syscall() */ + set_thread_flag(TIF_SIGPENDING); + + /* + * this isn't the same as continuing with a signal, but it will do + * for normal use. strace only continues with a signal if the + * stopping signal is not SIGTRAP. -brl + */ + if (current->exit_code) { + send_sig(current->exit_code, current, 1); + current->exit_code = 0; + } +} diff --git a/arch/um/kernel/reboot.c b/arch/um/kernel/reboot.c new file mode 100644 index 0000000..00197d3 --- /dev/null +++ b/arch/um/kernel/reboot.c @@ -0,0 +1,57 @@ +/* + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include "linux/sched.h" +#include "kern_util.h" +#include "os.h" +#include "skas.h" + +void (*pm_power_off)(void); + +static void kill_off_processes(void) +{ + if (proc_mm) + /* + * FIXME: need to loop over userspace_pids + */ + os_kill_ptraced_process(userspace_pid[0], 1); + else { + struct task_struct *p; + int pid, me; + + me = os_getpid(); + for_each_process(p) { + if (p->mm == NULL) + continue; + + pid = p->mm->context.id.u.pid; + os_kill_ptraced_process(pid, 1); + } + } +} + +void uml_cleanup(void) +{ + kmalloc_ok = 0; + do_uml_exitcalls(); + kill_off_processes(); +} + +void machine_restart(char * __unused) +{ + uml_cleanup(); + reboot_skas(); +} + +void machine_power_off(void) +{ + uml_cleanup(); + halt_skas(); +} + +void machine_halt(void) +{ + machine_power_off(); +} diff --git a/arch/um/kernel/sigio.c b/arch/um/kernel/sigio.c new file mode 100644 index 0000000..2b272b6 --- /dev/null +++ b/arch/um/kernel/sigio.c @@ -0,0 +1,50 @@ +/* + * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{linux.intel,addtoit}.com) + * Licensed under the GPL + */ + +#include <linux/interrupt.h> +#include "irq_kern.h" +#include "os.h" +#include "sigio.h" + +/* Protected by sigio_lock() called from write_sigio_workaround */ +static int sigio_irq_fd = -1; + +static irqreturn_t sigio_interrupt(int irq, void *data) +{ + char c; + + os_read_file(sigio_irq_fd, &c, sizeof(c)); + reactivate_fd(sigio_irq_fd, SIGIO_WRITE_IRQ); + return IRQ_HANDLED; +} + +int write_sigio_irq(int fd) +{ + int err; + + err = um_request_irq(SIGIO_WRITE_IRQ, fd, IRQ_READ, sigio_interrupt, + IRQF_DISABLED|IRQF_SAMPLE_RANDOM, "write sigio", + NULL); + if (err) { + printk(KERN_ERR "write_sigio_irq : um_request_irq failed, " + "err = %d\n", err); + return -1; + } + sigio_irq_fd = fd; + return 0; +} + +/* These are called from os-Linux/sigio.c to protect its pollfds arrays. */ +static DEFINE_SPINLOCK(sigio_spinlock); + +void sigio_lock(void) +{ + spin_lock(&sigio_spinlock); +} + +void sigio_unlock(void) +{ + spin_unlock(&sigio_spinlock); +} diff --git a/arch/um/kernel/signal.c b/arch/um/kernel/signal.c new file mode 100644 index 0000000..b5c094c --- /dev/null +++ b/arch/um/kernel/signal.c @@ -0,0 +1,182 @@ +/* + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include <linux/module.h> +#include <linux/ptrace.h> +#include <linux/sched.h> +#include <asm/siginfo.h> +#include <asm/signal.h> +#include <asm/unistd.h> +#include "frame_kern.h" +#include "kern_util.h" +#include <sysdep/sigcontext.h> + +EXPORT_SYMBOL(block_signals); +EXPORT_SYMBOL(unblock_signals); + +#define _S(nr) (1<<((nr)-1)) + +#define _BLOCKABLE (~(_S(SIGKILL) | _S(SIGSTOP))) + +/* + * OK, we're invoking a handler + */ +static int handle_signal(struct pt_regs *regs, unsigned long signr, + struct k_sigaction *ka, siginfo_t *info, + sigset_t *oldset) +{ + unsigned long sp; + int err; + + /* Always make any pending restarted system calls return -EINTR */ + current_thread_info()->restart_block.fn = do_no_restart_syscall; + + /* Did we come from a system call? */ + if (PT_REGS_SYSCALL_NR(regs) >= 0) { + /* If so, check system call restarting.. */ + switch (PT_REGS_SYSCALL_RET(regs)) { + case -ERESTART_RESTARTBLOCK: + case -ERESTARTNOHAND: + PT_REGS_SYSCALL_RET(regs) = -EINTR; + break; + + case -ERESTARTSYS: + if (!(ka->sa.sa_flags & SA_RESTART)) { + PT_REGS_SYSCALL_RET(regs) = -EINTR; + break; + } + /* fallthrough */ + case -ERESTARTNOINTR: + PT_REGS_RESTART_SYSCALL(regs); + PT_REGS_ORIG_SYSCALL(regs) = PT_REGS_SYSCALL_NR(regs); + break; + } + } + + sp = PT_REGS_SP(regs); + if ((ka->sa.sa_flags & SA_ONSTACK) && (sas_ss_flags(sp) == 0)) + sp = current->sas_ss_sp + current->sas_ss_size; + +#ifdef CONFIG_ARCH_HAS_SC_SIGNALS + if (!(ka->sa.sa_flags & SA_SIGINFO)) + err = setup_signal_stack_sc(sp, signr, ka, regs, oldset); + else +#endif + err = setup_signal_stack_si(sp, signr, ka, regs, info, oldset); + + if (err) { + spin_lock_irq(¤t->sighand->siglock); + current->blocked = *oldset; + recalc_sigpending(); + spin_unlock_irq(¤t->sighand->siglock); + force_sigsegv(signr, current); + } else { + spin_lock_irq(¤t->sighand->siglock); + sigorsets(¤t->blocked, ¤t->blocked, + &ka->sa.sa_mask); + if (!(ka->sa.sa_flags & SA_NODEFER)) + sigaddset(¤t->blocked, signr); + recalc_sigpending(); + spin_unlock_irq(¤t->sighand->siglock); + } + + return err; +} + +static int kern_do_signal(struct pt_regs *regs) +{ + struct k_sigaction ka_copy; + siginfo_t info; + sigset_t *oldset; + int sig, handled_sig = 0; + + if (test_thread_flag(TIF_RESTORE_SIGMASK)) + oldset = ¤t->saved_sigmask; + else + oldset = ¤t->blocked; + + while ((sig = get_signal_to_deliver(&info, &ka_copy, regs, NULL)) > 0) { + handled_sig = 1; + /* Whee! Actually deliver the signal. */ + if (!handle_signal(regs, sig, &ka_copy, &info, oldset)) { + /* + * a signal was successfully delivered; the saved + * sigmask will have been stored in the signal frame, + * and will be restored by sigreturn, so we can simply + * clear the TIF_RESTORE_SIGMASK flag + */ + if (test_thread_flag(TIF_RESTORE_SIGMASK)) + clear_thread_flag(TIF_RESTORE_SIGMASK); + break; + } + } + + /* Did we come from a system call? */ + if (!handled_sig && (PT_REGS_SYSCALL_NR(regs) >= 0)) { + /* Restart the system call - no handlers present */ + switch (PT_REGS_SYSCALL_RET(regs)) { + case -ERESTARTNOHAND: + case -ERESTARTSYS: + case -ERESTARTNOINTR: + PT_REGS_ORIG_SYSCALL(regs) = PT_REGS_SYSCALL_NR(regs); + PT_REGS_RESTART_SYSCALL(regs); + break; + case -ERESTART_RESTARTBLOCK: + PT_REGS_ORIG_SYSCALL(regs) = __NR_restart_syscall; + PT_REGS_RESTART_SYSCALL(regs); + break; + } + } + + /* + * This closes a way to execute a system call on the host. If + * you set a breakpoint on a system call instruction and singlestep + * from it, the tracing thread used to PTRACE_SINGLESTEP the process + * rather than PTRACE_SYSCALL it, allowing the system call to execute + * on the host. The tracing thread will check this flag and + * PTRACE_SYSCALL if necessary. + */ + if (current->ptrace & PT_DTRACE) + current->thread.singlestep_syscall = + is_syscall(PT_REGS_IP(¤t->thread.regs)); + + /* + * if there's no signal to deliver, we just put the saved sigmask + * back + */ + if (!handled_sig && test_thread_flag(TIF_RESTORE_SIGMASK)) { + clear_thread_flag(TIF_RESTORE_SIGMASK); + sigprocmask(SIG_SETMASK, ¤t->saved_sigmask, NULL); + } + return handled_sig; +} + +int do_signal(void) +{ + return kern_do_signal(¤t->thread.regs); +} + +/* + * Atomically swap in the new signal mask, and wait for a signal. + */ +long sys_sigsuspend(int history0, int history1, old_sigset_t mask) +{ + mask &= _BLOCKABLE; + spin_lock_irq(¤t->sighand->siglock); + current->saved_sigmask = current->blocked; + siginitset(¤t->blocked, mask); + recalc_sigpending(); + spin_unlock_irq(¤t->sighand->siglock); + + current->state = TASK_INTERRUPTIBLE; + schedule(); + set_thread_flag(TIF_RESTORE_SIGMASK); + return -ERESTARTNOHAND; +} + +long sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss) +{ + return do_sigaltstack(uss, uoss, PT_REGS_SP(¤t->thread.regs)); +} diff --git a/arch/um/kernel/skas/Makefile b/arch/um/kernel/skas/Makefile new file mode 100644 index 0000000..0b76d88 --- /dev/null +++ b/arch/um/kernel/skas/Makefile @@ -0,0 +1,15 @@ +# +# Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) +# Licensed under the GPL +# + +obj-y := clone.o mmu.o process.o syscall.o uaccess.o + +# clone.o is in the stub, so it can't be built with profiling +# GCC hardened also auto-enables -fpic, but we need %ebx so it can't work -> +# disable it + +CFLAGS_clone.o := $(CFLAGS_NO_HARDENING) +UNPROFILE_OBJS := clone.o + +include arch/um/scripts/Makefile.rules diff --git a/arch/um/kernel/skas/clone.c b/arch/um/kernel/skas/clone.c new file mode 100644 index 0000000..2c8583c --- /dev/null +++ b/arch/um/kernel/skas/clone.c @@ -0,0 +1,57 @@ +/* + * Copyright (C) 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include <signal.h> +#include <sched.h> +#include <asm/unistd.h> +#include <sys/time.h> +#include "as-layout.h" +#include "kern_constants.h" +#include "ptrace_user.h" +#include "stub-data.h" +#include "sysdep/stub.h" + +/* + * This is in a separate file because it needs to be compiled with any + * extraneous gcc flags (-pg, -fprofile-arcs, -ftest-coverage) disabled + * + * Use UM_KERN_PAGE_SIZE instead of PAGE_SIZE because that calls getpagesize + * on some systems. + */ + +void __attribute__ ((__section__ (".__syscall_stub"))) +stub_clone_handler(void) +{ + struct stub_data *data = (struct stub_data *) STUB_DATA; + long err; + + err = stub_syscall2(__NR_clone, CLONE_PARENT | CLONE_FILES | SIGCHLD, + STUB_DATA + UM_KERN_PAGE_SIZE / 2 - sizeof(void *)); + if (err != 0) + goto out; + + err = stub_syscall4(__NR_ptrace, PTRACE_TRACEME, 0, 0, 0); + if (err) + goto out; + + err = stub_syscall3(__NR_setitimer, ITIMER_VIRTUAL, + (long) &data->timer, 0); + if (err) + goto out; + + remap_stack(data->fd, data->offset); + goto done; + + out: + /* + * save current result. + * Parent: pid; + * child: retcode of mmap already saved and it jumps around this + * assignment + */ + data->err = err; + done: + trap_myself(); +} diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c new file mode 100644 index 0000000..0cd9a7a --- /dev/null +++ b/arch/um/kernel/skas/mmu.c @@ -0,0 +1,191 @@ +/* + * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include "linux/mm.h" +#include "linux/sched.h" +#include "asm/pgalloc.h" +#include "asm/pgtable.h" +#include "as-layout.h" +#include "os.h" +#include "skas.h" + +extern int __syscall_stub_start; + +static int init_stub_pte(struct mm_struct *mm, unsigned long proc, + unsigned long kernel) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + + pgd = pgd_offset(mm, proc); + pud = pud_alloc(mm, pgd, proc); + if (!pud) + goto out; + + pmd = pmd_alloc(mm, pud, proc); + if (!pmd) + goto out_pmd; + + pte = pte_alloc_map(mm, pmd, proc); + if (!pte) + goto out_pte; + + *pte = mk_pte(virt_to_page(kernel), __pgprot(_PAGE_PRESENT)); + *pte = pte_mkread(*pte); + return 0; + + out_pmd: + pud_free(mm, pud); + out_pte: + pmd_free(mm, pmd); + out: + return -ENOMEM; +} + +int init_new_context(struct task_struct *task, struct mm_struct *mm) +{ + struct mm_context *from_mm = NULL; + struct mm_context *to_mm = &mm->context; + unsigned long stack = 0; + int ret = -ENOMEM; + + if (skas_needs_stub) { + stack = get_zeroed_page(GFP_KERNEL); + if (stack == 0) + goto out; + } + + to_mm->id.stack = stack; + if (current->mm != NULL && current->mm != &init_mm) + from_mm = ¤t->mm->context; + + if (proc_mm) { + ret = new_mm(stack); + if (ret < 0) { + printk(KERN_ERR "init_new_context_skas - " + "new_mm failed, errno = %d\n", ret); + goto out_free; + } + to_mm->id.u.mm_fd = ret; + } + else { + if (from_mm) + to_mm->id.u.pid = copy_context_skas0(stack, + from_mm->id.u.pid); + else to_mm->id.u.pid = start_userspace(stack); + + if (to_mm->id.u.pid < 0) { + ret = to_mm->id.u.pid; + goto out_free; + } + } + + ret = init_new_ldt(to_mm, from_mm); + if (ret < 0) { + printk(KERN_ERR "init_new_context_skas - init_ldt" + " failed, errno = %d\n", ret); + goto out_free; + } + + to_mm->stub_pages = NULL; + + return 0; + + out_free: + if (to_mm->id.stack != 0) + free_page(to_mm->id.stack); + out: + return ret; +} + +void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) +{ + struct page **pages; + int err, ret; + + if (!skas_needs_stub) + return; + + ret = init_stub_pte(mm, STUB_CODE, + (unsigned long) &__syscall_stub_start); + if (ret) + goto out; + + ret = init_stub_pte(mm, STUB_DATA, mm->context.id.stack); + if (ret) + goto out; + + pages = kmalloc(2 * sizeof(struct page *), GFP_KERNEL); + if (pages == NULL) { + printk(KERN_ERR "arch_dup_mmap failed to allocate 2 page " + "pointers\n"); + goto out; + } + + pages[0] = virt_to_page(&__syscall_stub_start); + pages[1] = virt_to_page(mm->context.id.stack); + mm->context.stub_pages = pages; + + /* dup_mmap already holds mmap_sem */ + err = install_special_mapping(mm, STUB_START, STUB_END - STUB_START, + VM_READ | VM_MAYREAD | VM_EXEC | + VM_MAYEXEC | VM_DONTCOPY, pages); + if (err) { + printk(KERN_ERR "install_special_mapping returned %d\n", err); + goto out_free; + } + return; + +out_free: + kfree(pages); +out: + force_sigsegv(SIGSEGV, current); +} + +void arch_exit_mmap(struct mm_struct *mm) +{ + pte_t *pte; + + if (mm->context.stub_pages != NULL) + kfree(mm->context.stub_pages); + pte = virt_to_pte(mm, STUB_CODE); + if (pte != NULL) + pte_clear(mm, STUB_CODE, pte); + + pte = virt_to_pte(mm, STUB_DATA); + if (pte == NULL) + return; + + pte_clear(mm, STUB_DATA, pte); +} + +void destroy_context(struct mm_struct *mm) +{ + struct mm_context *mmu = &mm->context; + + if (proc_mm) + os_close_file(mmu->id.u.mm_fd); + else { + /* + * If init_new_context wasn't called, this will be + * zero, resulting in a kill(0), which will result in the + * whole UML suddenly dying. Also, cover negative and + * 1 cases, since they shouldn't happen either. + */ + if (mmu->id.u.pid < 2) { + printk(KERN_ERR "corrupt mm_context - pid = %d\n", + mmu->id.u.pid); + return; + } + os_kill_ptraced_process(mmu->id.u.pid, 1); + } + + if (skas_needs_stub) + free_page(mmu->id.stack); + + free_ldt(mmu); +} diff --git a/arch/um/kernel/skas/process.c b/arch/um/kernel/skas/process.c new file mode 100644 index 0000000..2e9852c --- /dev/null +++ b/arch/um/kernel/skas/process.c @@ -0,0 +1,81 @@ +/* + * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include "linux/init.h" +#include "linux/sched.h" +#include "as-layout.h" +#include "kern.h" +#include "os.h" +#include "skas.h" + +int new_mm(unsigned long stack) +{ + int fd, err; + + fd = os_open_file("/proc/mm", of_cloexec(of_write(OPENFLAGS())), 0); + if (fd < 0) + return fd; + + if (skas_needs_stub) { + err = map_stub_pages(fd, STUB_CODE, STUB_DATA, stack); + if (err) { + os_close_file(fd); + return err; + } + } + + return fd; +} + +extern void start_kernel(void); + +static int __init start_kernel_proc(void *unused) +{ + int pid; + + block_signals(); + pid = os_getpid(); + + cpu_tasks[0].pid = pid; + cpu_tasks[0].task = current; +#ifdef CONFIG_SMP + cpu_online_map = cpumask_of_cpu(0); +#endif + start_kernel(); + return 0; +} + +extern int userspace_pid[]; + +extern char cpu0_irqstack[]; + +int __init start_uml(void) +{ + stack_protections((unsigned long) &cpu0_irqstack); + set_sigstack(cpu0_irqstack, THREAD_SIZE); + if (proc_mm) { + userspace_pid[0] = start_userspace(0); + if (userspace_pid[0] < 0) { + printf("start_uml - start_userspace returned %d\n", + userspace_pid[0]); + exit(1); + } + } + + init_new_thread_signals(); + + init_task.thread.request.u.thread.proc = start_kernel_proc; + init_task.thread.request.u.thread.arg = NULL; + return start_idle_thread(task_stack_page(&init_task), + &init_task.thread.switch_buf); +} + +unsigned long current_stub_stack(void) +{ + if (current->mm == NULL) + return 0; + + return current->mm->context.id.stack; +} diff --git a/arch/um/kernel/skas/syscall.c b/arch/um/kernel/skas/syscall.c new file mode 100644 index 0000000..4e3b820 --- /dev/null +++ b/arch/um/kernel/skas/syscall.c @@ -0,0 +1,40 @@ +/* + * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include "linux/kernel.h" +#include "linux/ptrace.h" +#include "kern_util.h" +#include "sysdep/ptrace.h" +#include "sysdep/syscalls.h" + +extern int syscall_table_size; +#define NR_syscalls (syscall_table_size / sizeof(void *)) + +void handle_syscall(struct uml_pt_regs *r) +{ + struct pt_regs *regs = container_of(r, struct pt_regs, regs); + long result; + int syscall; + + syscall_trace(r, 0); + + /* + * This should go in the declaration of syscall, but when I do that, + * strace -f -c bash -c 'ls ; ls' breaks, sometimes not tracing + * children at all, sometimes hanging when bash doesn't see the first + * ls exit. + * The assembly looks functionally the same to me. This is + * gcc version 4.0.1 20050727 (Red Hat 4.0.1-5) + * in case it's a compiler bug. + */ + syscall = UPT_SYSCALL_NR(r); + if ((syscall >= NR_syscalls) || (syscall < 0)) + result = -ENOSYS; + else result = EXECUTE_SYSCALL(syscall, regs); + + REGS_SET_SYSCALL_RETURN(r->gp, result); + + syscall_trace(r, 1); +} diff --git a/arch/um/kernel/skas/uaccess.c b/arch/um/kernel/skas/uaccess.c new file mode 100644 index 0000000..e22c969 --- /dev/null +++ b/arch/um/kernel/skas/uaccess.c @@ -0,0 +1,253 @@ +/* + * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include <linux/err.h> +#include <linux/highmem.h> +#include <linux/mm.h> +#include <linux/sched.h> +#include <asm/current.h> +#include <asm/page.h> +#include <asm/pgtable.h> +#include "kern_util.h" +#include "os.h" + +pte_t *virt_to_pte(struct mm_struct *mm, unsigned long addr) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + + if (mm == NULL) + return NULL; + + pgd = pgd_offset(mm, addr); + if (!pgd_present(*pgd)) + return NULL; + + pud = pud_offset(pgd, addr); + if (!pud_present(*pud)) + return NULL; + + pmd = pmd_offset(pud, addr); + if (!pmd_present(*pmd)) + return NULL; + + return pte_offset_kernel(pmd, addr); +} + +static pte_t *maybe_map(unsigned long virt, int is_write) +{ + pte_t *pte = virt_to_pte(current->mm, virt); + int err, dummy_code; + + if ((pte == NULL) || !pte_present(*pte) || + (is_write && !pte_write(*pte))) { + err = handle_page_fault(virt, 0, is_write, 1, &dummy_code); + if (err) + return NULL; + pte = virt_to_pte(current->mm, virt); + } + if (!pte_present(*pte)) + pte = NULL; + + return pte; +} + +static int do_op_one_page(unsigned long addr, int len, int is_write, + int (*op)(unsigned long addr, int len, void *arg), void *arg) +{ + jmp_buf buf; + struct page *page; + pte_t *pte; + int n, faulted; + + pte = maybe_map(addr, is_write); + if (pte == NULL) + return -1; + + page = pte_page(*pte); + addr = (unsigned long) kmap_atomic(page, KM_UML_USERCOPY) + + (addr & ~PAGE_MASK); + + current->thread.fault_catcher = &buf; + + faulted = UML_SETJMP(&buf); + if (faulted == 0) + n = (*op)(addr, len, arg); + else + n = -1; + + current->thread.fault_catcher = NULL; + + kunmap_atomic(page, KM_UML_USERCOPY); + + return n; +} + +static int buffer_op(unsigned long addr, int len, int is_write, + int (*op)(unsigned long, int, void *), void *arg) +{ + int size, remain, n; + + size = min(PAGE_ALIGN(addr) - addr, (unsigned long) len); + remain = len; + + n = do_op_one_page(addr, size, is_write, op, arg); + if (n != 0) { + remain = (n < 0 ? remain : 0); + goto out; + } + + addr += size; + remain -= size; + if (remain == 0) + goto out; + + while (addr < ((addr + remain) & PAGE_MASK)) { + n = do_op_one_page(addr, PAGE_SIZE, is_write, op, arg); + if (n != 0) { + remain = (n < 0 ? remain : 0); + goto out; + } + + addr += PAGE_SIZE; + remain -= PAGE_SIZE; + } + if (remain == 0) + goto out; + + n = do_op_one_page(addr, remain, is_write, op, arg); + if (n != 0) { + remain = (n < 0 ? remain : 0); + goto out; + } + + return 0; + out: + return remain; +} + +static int copy_chunk_from_user(unsigned long from, int len, void *arg) +{ + unsigned long *to_ptr = arg, to = *to_ptr; + + memcpy((void *) to, (void *) from, len); + *to_ptr += len; + return 0; +} + +int copy_from_user(void *to, const void __user *from, int n) +{ + if (segment_eq(get_fs(), KERNEL_DS)) { + memcpy(to, (__force void*)from, n); + return 0; + } + + return access_ok(VERIFY_READ, from, n) ? + buffer_op((unsigned long) from, n, 0, copy_chunk_from_user, &to): + n; +} + +static int copy_chunk_to_user(unsigned long to, int len, void *arg) +{ + unsigned long *from_ptr = arg, from = *from_ptr; + + memcpy((void *) to, (void *) from, len); + *from_ptr += len; + return 0; +} + +int copy_to_user(void __user *to, const void *from, int n) +{ + if (segment_eq(get_fs(), KERNEL_DS)) { + memcpy((__force void *) to, from, n); + return 0; + } + + return access_ok(VERIFY_WRITE, to, n) ? + buffer_op((unsigned long) to, n, 1, copy_chunk_to_user, &from) : + n; +} + +static int strncpy_chunk_from_user(unsigned long from, int len, void *arg) +{ + char **to_ptr = arg, *to = *to_ptr; + int n; + + strncpy(to, (void *) from, len); + n = strnlen(to, len); + *to_ptr += n; + + if (n < len) + return 1; + return 0; +} + +int strncpy_from_user(char *dst, const char __user *src, int count) +{ + int n; + char *ptr = dst; + + if (segment_eq(get_fs(), KERNEL_DS)) { + strncpy(dst, (__force void *) src, count); + return strnlen(dst, count); + } + + if (!access_ok(VERIFY_READ, src, 1)) + return -EFAULT; + + n = buffer_op((unsigned long) src, count, 0, strncpy_chunk_from_user, + &ptr); + if (n != 0) + return -EFAULT; + return strnlen(dst, count); +} + +static int clear_chunk(unsigned long addr, int len, void *unused) +{ + memset((void *) addr, 0, len); + return 0; +} + +int __clear_user(void __user *mem, int len) +{ + return buffer_op((unsigned long) mem, len, 1, clear_chunk, NULL); +} + +int clear_user(void __user *mem, int len) +{ + if (segment_eq(get_fs(), KERNEL_DS)) { + memset((__force void*)mem, 0, len); + return 0; + } + + return access_ok(VERIFY_WRITE, mem, len) ? + buffer_op((unsigned long) mem, len, 1, clear_chunk, NULL) : len; +} + +static int strnlen_chunk(unsigned long str, int len, void *arg) +{ + int *len_ptr = arg, n; + + n = strnlen((void *) str, len); + *len_ptr += n; + + if (n < len) + return 1; + return 0; +} + +int strnlen_user(const void __user *str, int len) +{ + int count = 0, n; + + if (segment_eq(get_fs(), KERNEL_DS)) + return strnlen((__force char*)str, len) + 1; + + n = buffer_op((unsigned long) str, len, 0, strnlen_chunk, &count); + if (n == 0) + return count + 1; + return -EFAULT; +} diff --git a/arch/um/kernel/smp.c b/arch/um/kernel/smp.c new file mode 100644 index 0000000..0457721 --- /dev/null +++ b/arch/um/kernel/smp.c @@ -0,0 +1,249 @@ +/* + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include "linux/percpu.h" +#include "asm/pgalloc.h" +#include "asm/tlb.h" + +/* For some reason, mmu_gathers are referenced when CONFIG_SMP is off. */ +DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); + +#ifdef CONFIG_SMP + +#include "linux/sched.h" +#include "linux/module.h" +#include "linux/threads.h" +#include "linux/interrupt.h" +#include "linux/err.h" +#include "linux/hardirq.h" +#include "asm/smp.h" +#include "asm/processor.h" +#include "asm/spinlock.h" +#include "kern.h" +#include "irq_user.h" +#include "os.h" + +/* CPU online map, set by smp_boot_cpus */ +cpumask_t cpu_online_map = CPU_MASK_NONE; +cpumask_t cpu_possible_map = CPU_MASK_NONE; + +EXPORT_SYMBOL(cpu_online_map); +EXPORT_SYMBOL(cpu_possible_map); + +/* Per CPU bogomips and other parameters + * The only piece used here is the ipi pipe, which is set before SMP is + * started and never changed. + */ +struct cpuinfo_um cpu_data[NR_CPUS]; + +/* A statistic, can be a little off */ +int num_reschedules_sent = 0; + +/* Not changed after boot */ +struct task_struct *idle_threads[NR_CPUS]; + +void smp_send_reschedule(int cpu) +{ + os_write_file(cpu_data[cpu].ipi_pipe[1], "R", 1); + num_reschedules_sent++; +} + +void smp_send_stop(void) +{ + int i; + + printk(KERN_INFO "Stopping all CPUs..."); + for (i = 0; i < num_online_cpus(); i++) { + if (i == current_thread->cpu) + continue; + os_write_file(cpu_data[i].ipi_pipe[1], "S", 1); + } + printk(KERN_CONT "done\n"); +} + +static cpumask_t smp_commenced_mask = CPU_MASK_NONE; +static cpumask_t cpu_callin_map = CPU_MASK_NONE; + +static int idle_proc(void *cpup) +{ + int cpu = (int) cpup, err; + + err = os_pipe(cpu_data[cpu].ipi_pipe, 1, 1); + if (err < 0) + panic("CPU#%d failed to create IPI pipe, err = %d", cpu, -err); + + os_set_fd_async(cpu_data[cpu].ipi_pipe[0]); + + wmb(); + if (cpu_test_and_set(cpu, cpu_callin_map)) { + printk(KERN_ERR "huh, CPU#%d already present??\n", cpu); + BUG(); + } + + while (!cpu_isset(cpu, smp_commenced_mask)) + cpu_relax(); + + notify_cpu_starting(cpu); + cpu_set(cpu, cpu_online_map); + default_idle(); + return 0; +} + +static struct task_struct *idle_thread(int cpu) +{ + struct task_struct *new_task; + + current->thread.request.u.thread.proc = idle_proc; + current->thread.request.u.thread.arg = (void *) cpu; + new_task = fork_idle(cpu); + if (IS_ERR(new_task)) + panic("copy_process failed in idle_thread, error = %ld", + PTR_ERR(new_task)); + + cpu_tasks[cpu] = ((struct cpu_task) + { .pid = new_task->thread.mode.tt.extern_pid, + .task = new_task } ); + idle_threads[cpu] = new_task; + panic("skas mode doesn't support SMP"); + return new_task; +} + +void smp_prepare_cpus(unsigned int maxcpus) +{ + struct task_struct *idle; + unsigned long waittime; + int err, cpu, me = smp_processor_id(); + int i; + + for (i = 0; i < ncpus; ++i) + cpu_set(i, cpu_possible_map); + + cpu_clear(me, cpu_online_map); + cpu_set(me, cpu_online_map); + cpu_set(me, cpu_callin_map); + + err = os_pipe(cpu_data[me].ipi_pipe, 1, 1); + if (err < 0) + panic("CPU#0 failed to create IPI pipe, errno = %d", -err); + + os_set_fd_async(cpu_data[me].ipi_pipe[0]); + + for (cpu = 1; cpu < ncpus; cpu++) { + printk(KERN_INFO "Booting processor %d...\n", cpu); + + idle = idle_thread(cpu); + + init_idle(idle, cpu); + + waittime = 200000000; + while (waittime-- && !cpu_isset(cpu, cpu_callin_map)) + cpu_relax(); + + printk(KERN_INFO "%s\n", + cpu_isset(cpu, cpu_calling_map) ? "done" : "failed"); + } +} + +void smp_prepare_boot_cpu(void) +{ + cpu_set(smp_processor_id(), cpu_online_map); +} + +int __cpu_up(unsigned int cpu) +{ + cpu_set(cpu, smp_commenced_mask); + while (!cpu_isset(cpu, cpu_online_map)) + mb(); + return 0; +} + +int setup_profiling_timer(unsigned int multiplier) +{ + printk(KERN_INFO "setup_profiling_timer\n"); + return 0; +} + +void smp_call_function_slave(int cpu); + +void IPI_handler(int cpu) +{ + unsigned char c; + int fd; + + fd = cpu_data[cpu].ipi_pipe[0]; + while (os_read_file(fd, &c, 1) == 1) { + switch (c) { + case 'C': + smp_call_function_slave(cpu); + break; + + case 'R': + set_tsk_need_resched(current); + break; + + case 'S': + printk(KERN_INFO "CPU#%d stopping\n", cpu); + while (1) + pause(); + break; + + default: + printk(KERN_ERR "CPU#%d received unknown IPI [%c]!\n", + cpu, c); + break; + } + } +} + +int hard_smp_processor_id(void) +{ + return pid_to_processor_id(os_getpid()); +} + +static DEFINE_SPINLOCK(call_lock); +static atomic_t scf_started; +static atomic_t scf_finished; +static void (*func)(void *info); +static void *info; + +void smp_call_function_slave(int cpu) +{ + atomic_inc(&scf_started); + (*func)(info); + atomic_inc(&scf_finished); +} + +int smp_call_function(void (*_func)(void *info), void *_info, int wait) +{ + int cpus = num_online_cpus() - 1; + int i; + + if (!cpus) + return 0; + + /* Can deadlock when called with interrupts disabled */ + WARN_ON(irqs_disabled()); + + spin_lock_bh(&call_lock); + atomic_set(&scf_started, 0); + atomic_set(&scf_finished, 0); + func = _func; + info = _info; + + for_each_online_cpu(i) + os_write_file(cpu_data[i].ipi_pipe[1], "C", 1); + + while (atomic_read(&scf_started) != cpus) + barrier(); + + if (wait) + while (atomic_read(&scf_finished) != cpus) + barrier(); + + spin_unlock_bh(&call_lock); + return 0; +} + +#endif diff --git a/arch/um/kernel/syscall.c b/arch/um/kernel/syscall.c new file mode 100644 index 0000000..c4df705 --- /dev/null +++ b/arch/um/kernel/syscall.c @@ -0,0 +1,134 @@ +/* + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include "linux/file.h" +#include "linux/fs.h" +#include "linux/mm.h" +#include "linux/sched.h" +#include "linux/utsname.h" +#include "asm/current.h" +#include "asm/mman.h" +#include "asm/uaccess.h" +#include "asm/unistd.h" +#include "internal.h" + +long sys_fork(void) +{ + long ret; + + current->thread.forking = 1; + ret = do_fork(SIGCHLD, UPT_SP(¤t->thread.regs.regs), + ¤t->thread.regs, 0, NULL, NULL); + current->thread.forking = 0; + return ret; +} + +long sys_vfork(void) +{ + long ret; + + current->thread.forking = 1; + ret = do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, + UPT_SP(¤t->thread.regs.regs), + ¤t->thread.regs, 0, NULL, NULL); + current->thread.forking = 0; + return ret; +} + +/* common code for old and new mmaps */ +long sys_mmap2(unsigned long addr, unsigned long len, + unsigned long prot, unsigned long flags, + unsigned long fd, unsigned long pgoff) +{ + long error = -EBADF; + struct file * file = NULL; + + flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); + if (!(flags & MAP_ANONYMOUS)) { + file = fget(fd); + if (!file) + goto out; + } + + down_write(¤t->mm->mmap_sem); + error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); + up_write(¤t->mm->mmap_sem); + + if (file) + fput(file); + out: + return error; +} + +long old_mmap(unsigned long addr, unsigned long len, + unsigned long prot, unsigned long flags, + unsigned long fd, unsigned long offset) +{ + long err = -EINVAL; + if (offset & ~PAGE_MASK) + goto out; + + err = sys_mmap2(addr, len, prot, flags, fd, offset >> PAGE_SHIFT); + out: + return err; +} + +long sys_uname(struct old_utsname __user * name) +{ + long err; + if (!name) + return -EFAULT; + down_read(&uts_sem); + err = copy_to_user(name, utsname(), sizeof (*name)); + up_read(&uts_sem); + return err?-EFAULT:0; +} + +long sys_olduname(struct oldold_utsname __user * name) +{ + long error; + + if (!name) + return -EFAULT; + if (!access_ok(VERIFY_WRITE,name,sizeof(struct oldold_utsname))) + return -EFAULT; + + down_read(&uts_sem); + + error = __copy_to_user(&name->sysname, &utsname()->sysname, + __OLD_UTS_LEN); + error |= __put_user(0, name->sysname + __OLD_UTS_LEN); + error |= __copy_to_user(&name->nodename, &utsname()->nodename, + __OLD_UTS_LEN); + error |= __put_user(0, name->nodename + __OLD_UTS_LEN); + error |= __copy_to_user(&name->release, &utsname()->release, + __OLD_UTS_LEN); + error |= __put_user(0, name->release + __OLD_UTS_LEN); + error |= __copy_to_user(&name->version, &utsname()->version, + __OLD_UTS_LEN); + error |= __put_user(0, name->version + __OLD_UTS_LEN); + error |= __copy_to_user(&name->machine, &utsname()->machine, + __OLD_UTS_LEN); + error |= __put_user(0, name->machine + __OLD_UTS_LEN); + + up_read(&uts_sem); + + error = error ? -EFAULT : 0; + + return error; +} + +int kernel_execve(const char *filename, char *const argv[], char *const envp[]) +{ + mm_segment_t fs; + int ret; + + fs = get_fs(); + set_fs(KERNEL_DS); + ret = um_execve(filename, argv, envp); + set_fs(fs); + + return ret; +} diff --git a/arch/um/kernel/sysrq.c b/arch/um/kernel/sysrq.c new file mode 100644 index 0000000..56d43d0 --- /dev/null +++ b/arch/um/kernel/sysrq.c @@ -0,0 +1,78 @@ +/* + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include <linux/kallsyms.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/sched.h> +#include "sysrq.h" + +/* Catch non-i386 SUBARCH's. */ +#if !defined(CONFIG_UML_X86) || defined(CONFIG_64BIT) +void show_trace(struct task_struct *task, unsigned long * stack) +{ + unsigned long addr; + + if (!stack) { + stack = (unsigned long*) &stack; + WARN_ON(1); + } + + printk(KERN_INFO "Call Trace: \n"); + while (((long) stack & (THREAD_SIZE-1)) != 0) { + addr = *stack; + if (__kernel_text_address(addr)) { + printk(KERN_INFO "%08lx: [<%08lx>]", + (unsigned long) stack, addr); + print_symbol(KERN_CONT " %s", addr); + printk(KERN_CONT "\n"); + } + stack++; + } + printk(KERN_INFO "\n"); +} +#endif + +/* + * stack dumps generator - this is used by arch-independent code. + * And this is identical to i386 currently. + */ +void dump_stack(void) +{ + unsigned long stack; + + show_trace(current, &stack); +} +EXPORT_SYMBOL(dump_stack); + +/*Stolen from arch/i386/kernel/traps.c */ +static const int kstack_depth_to_print = 24; + +/* This recently started being used in arch-independent code too, as in + * kernel/sched.c.*/ +void show_stack(struct task_struct *task, unsigned long *esp) +{ + unsigned long *stack; + int i; + + if (esp == NULL) { + if (task != current && task != NULL) { + esp = (unsigned long *) KSTK_ESP(task); + } else { + esp = (unsigned long *) &esp; + } + } + + stack = esp; + for (i = 0; i < kstack_depth_to_print; i++) { + if (kstack_end(stack)) + break; + if (i && ((i % 8) == 0)) + printk("\n" KERN_INFO " "); + printk("%08lx ", *stack++); + } + + show_trace(task, esp); +} diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c new file mode 100644 index 0000000..47f04f4 --- /dev/null +++ b/arch/um/kernel/time.c @@ -0,0 +1,117 @@ +/* + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include <linux/clockchips.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/jiffies.h> +#include <linux/threads.h> +#include <asm/irq.h> +#include <asm/param.h> +#include "kern_util.h" +#include "os.h" + +void timer_handler(int sig, struct uml_pt_regs *regs) +{ + unsigned long flags; + + local_irq_save(flags); + do_IRQ(TIMER_IRQ, regs); + local_irq_restore(flags); +} + +static void itimer_set_mode(enum clock_event_mode mode, + struct clock_event_device *evt) +{ + switch (mode) { + case CLOCK_EVT_MODE_PERIODIC: + set_interval(); + break; + + case CLOCK_EVT_MODE_SHUTDOWN: + case CLOCK_EVT_MODE_UNUSED: + case CLOCK_EVT_MODE_ONESHOT: + disable_timer(); + break; + + case CLOCK_EVT_MODE_RESUME: + break; + } +} + +static int itimer_next_event(unsigned long delta, + struct clock_event_device *evt) +{ + return timer_one_shot(delta + 1); +} + +static struct clock_event_device itimer_clockevent = { + .name = "itimer", + .rating = 250, + .cpumask = CPU_MASK_ALL, + .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, + .set_mode = itimer_set_mode, + .set_next_event = itimer_next_event, + .shift = 32, + .irq = 0, +}; + +static irqreturn_t um_timer(int irq, void *dev) +{ + (*itimer_clockevent.event_handler)(&itimer_clockevent); + + return IRQ_HANDLED; +} + +static cycle_t itimer_read(void) +{ + return os_nsecs() / 1000; +} + +static struct clocksource itimer_clocksource = { + .name = "itimer", + .rating = 300, + .read = itimer_read, + .mask = CLOCKSOURCE_MASK(64), + .mult = 1000, + .shift = 0, + .flags = CLOCK_SOURCE_IS_CONTINUOUS, +}; + +static void __init setup_itimer(void) +{ + int err; + + err = request_irq(TIMER_IRQ, um_timer, IRQF_DISABLED, "timer", NULL); + if (err != 0) + printk(KERN_ERR "register_timer : request_irq failed - " + "errno = %d\n", -err); + + itimer_clockevent.mult = div_sc(HZ, NSEC_PER_SEC, 32); + itimer_clockevent.max_delta_ns = + clockevent_delta2ns(60 * HZ, &itimer_clockevent); + itimer_clockevent.min_delta_ns = + clockevent_delta2ns(1, &itimer_clockevent); + err = clocksource_register(&itimer_clocksource); + if (err) { + printk(KERN_ERR "clocksource_register returned %d\n", err); + return; + } + clockevents_register_device(&itimer_clockevent); +} + +void __init time_init(void) +{ + long long nsecs; + + timer_init(); + + nsecs = os_nsecs(); + set_normalized_timespec(&wall_to_monotonic, -nsecs / NSEC_PER_SEC, + -nsecs % NSEC_PER_SEC); + set_normalized_timespec(&xtime, nsecs / NSEC_PER_SEC, + nsecs % NSEC_PER_SEC); + late_time_init = setup_itimer; +} diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c new file mode 100644 index 0000000..d175d05 --- /dev/null +++ b/arch/um/kernel/tlb.c @@ -0,0 +1,535 @@ +/* + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include <linux/mm.h> +#include <linux/sched.h> +#include <asm/pgtable.h> +#include <asm/tlbflush.h> +#include "as-layout.h" +#include "mem_user.h" +#include "os.h" +#include "skas.h" +#include "tlb.h" + +struct host_vm_change { + struct host_vm_op { + enum { NONE, MMAP, MUNMAP, MPROTECT } type; + union { + struct { + unsigned long addr; + unsigned long len; + unsigned int prot; + int fd; + __u64 offset; + } mmap; + struct { + unsigned long addr; + unsigned long len; + } munmap; + struct { + unsigned long addr; + unsigned long len; + unsigned int prot; + } mprotect; + } u; + } ops[1]; + int index; + struct mm_id *id; + void *data; + int force; +}; + +#define INIT_HVC(mm, force) \ + ((struct host_vm_change) \ + { .ops = { { .type = NONE } }, \ + .id = &mm->context.id, \ + .data = NULL, \ + .index = 0, \ + .force = force }) + +static int do_ops(struct host_vm_change *hvc, int end, + int finished) +{ + struct host_vm_op *op; + int i, ret = 0; + + for (i = 0; i < end && !ret; i++) { + op = &hvc->ops[i]; + switch (op->type) { + case MMAP: + ret = map(hvc->id, op->u.mmap.addr, op->u.mmap.len, + op->u.mmap.prot, op->u.mmap.fd, + op->u.mmap.offset, finished, &hvc->data); + break; + case MUNMAP: + ret = unmap(hvc->id, op->u.munmap.addr, + op->u.munmap.len, finished, &hvc->data); + break; + case MPROTECT: + ret = protect(hvc->id, op->u.mprotect.addr, + op->u.mprotect.len, op->u.mprotect.prot, + finished, &hvc->data); + break; + default: + printk(KERN_ERR "Unknown op type %d in do_ops\n", + op->type); + break; + } + } + + return ret; +} + +static int add_mmap(unsigned long virt, unsigned long phys, unsigned long len, + unsigned int prot, struct host_vm_change *hvc) +{ + __u64 offset; + struct host_vm_op *last; + int fd, ret = 0; + + fd = phys_mapping(phys, &offset); + if (hvc->index != 0) { + last = &hvc->ops[hvc->index - 1]; + if ((last->type == MMAP) && + (last->u.mmap.addr + last->u.mmap.len == virt) && + (last->u.mmap.prot == prot) && (last->u.mmap.fd == fd) && + (last->u.mmap.offset + last->u.mmap.len == offset)) { + last->u.mmap.len += len; + return 0; + } + } + + if (hvc->index == ARRAY_SIZE(hvc->ops)) { + ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0); + hvc->index = 0; + } + + hvc->ops[hvc->index++] = ((struct host_vm_op) + { .type = MMAP, + .u = { .mmap = { .addr = virt, + .len = len, + .prot = prot, + .fd = fd, + .offset = offset } + } }); + return ret; +} + +static int add_munmap(unsigned long addr, unsigned long len, + struct host_vm_change *hvc) +{ + struct host_vm_op *last; + int ret = 0; + + if (hvc->index != 0) { + last = &hvc->ops[hvc->index - 1]; + if ((last->type == MUNMAP) && + (last->u.munmap.addr + last->u.mmap.len == addr)) { + last->u.munmap.len += len; + return 0; + } + } + + if (hvc->index == ARRAY_SIZE(hvc->ops)) { + ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0); + hvc->index = 0; + } + + hvc->ops[hvc->index++] = ((struct host_vm_op) + { .type = MUNMAP, + .u = { .munmap = { .addr = addr, + .len = len } } }); + return ret; +} + +static int add_mprotect(unsigned long addr, unsigned long len, + unsigned int prot, struct host_vm_change *hvc) +{ + struct host_vm_op *last; + int ret = 0; + + if (hvc->index != 0) { + last = &hvc->ops[hvc->index - 1]; + if ((last->type == MPROTECT) && + (last->u.mprotect.addr + last->u.mprotect.len == addr) && + (last->u.mprotect.prot == prot)) { + last->u.mprotect.len += len; + return 0; + } + } + + if (hvc->index == ARRAY_SIZE(hvc->ops)) { + ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0); + hvc->index = 0; + } + + hvc->ops[hvc->index++] = ((struct host_vm_op) + { .type = MPROTECT, + .u = { .mprotect = { .addr = addr, + .len = len, + .prot = prot } } }); + return ret; +} + +#define ADD_ROUND(n, inc) (((n) + (inc)) & ~((inc) - 1)) + +static inline int update_pte_range(pmd_t *pmd, unsigned long addr, + unsigned long end, + struct host_vm_change *hvc) +{ + pte_t *pte; + int r, w, x, prot, ret = 0; + + pte = pte_offset_kernel(pmd, addr); + do { + if ((addr >= STUB_START) && (addr < STUB_END)) + continue; + + r = pte_read(*pte); + w = pte_write(*pte); + x = pte_exec(*pte); + if (!pte_young(*pte)) { + r = 0; + w = 0; + } else if (!pte_dirty(*pte)) + w = 0; + + prot = ((r ? UM_PROT_READ : 0) | (w ? UM_PROT_WRITE : 0) | + (x ? UM_PROT_EXEC : 0)); + if (hvc->force || pte_newpage(*pte)) { + if (pte_present(*pte)) + ret = add_mmap(addr, pte_val(*pte) & PAGE_MASK, + PAGE_SIZE, prot, hvc); + else + ret = add_munmap(addr, PAGE_SIZE, hvc); + } else if (pte_newprot(*pte)) + ret = add_mprotect(addr, PAGE_SIZE, prot, hvc); + *pte = pte_mkuptodate(*pte); + } while (pte++, addr += PAGE_SIZE, ((addr < end) && !ret)); + return ret; +} + +static inline int update_pmd_range(pud_t *pud, unsigned long addr, + unsigned long end, + struct host_vm_change *hvc) +{ + pmd_t *pmd; + unsigned long next; + int ret = 0; + + pmd = pmd_offset(pud, addr); + do { + next = pmd_addr_end(addr, end); + if (!pmd_present(*pmd)) { + if (hvc->force || pmd_newpage(*pmd)) { + ret = add_munmap(addr, next - addr, hvc); + pmd_mkuptodate(*pmd); + } + } + else ret = update_pte_range(pmd, addr, next, hvc); + } while (pmd++, addr = next, ((addr < end) && !ret)); + return ret; +} + +static inline int update_pud_range(pgd_t *pgd, unsigned long addr, + unsigned long end, + struct host_vm_change *hvc) +{ + pud_t *pud; + unsigned long next; + int ret = 0; + + pud = pud_offset(pgd, addr); + do { + next = pud_addr_end(addr, end); + if (!pud_present(*pud)) { + if (hvc->force || pud_newpage(*pud)) { + ret = add_munmap(addr, next - addr, hvc); + pud_mkuptodate(*pud); + } + } + else ret = update_pmd_range(pud, addr, next, hvc); + } while (pud++, addr = next, ((addr < end) && !ret)); + return ret; +} + +void fix_range_common(struct mm_struct *mm, unsigned long start_addr, + unsigned long end_addr, int force) +{ + pgd_t *pgd; + struct host_vm_change hvc; + unsigned long addr = start_addr, next; + int ret = 0; + + hvc = INIT_HVC(mm, force); + pgd = pgd_offset(mm, addr); + do { + next = pgd_addr_end(addr, end_addr); + if (!pgd_present(*pgd)) { + if (force || pgd_newpage(*pgd)) { + ret = add_munmap(addr, next - addr, &hvc); + pgd_mkuptodate(*pgd); + } + } + else ret = update_pud_range(pgd, addr, next, &hvc); + } while (pgd++, addr = next, ((addr < end_addr) && !ret)); + + if (!ret) + ret = do_ops(&hvc, hvc.index, 1); + + /* This is not an else because ret is modified above */ + if (ret) { + printk(KERN_ERR "fix_range_common: failed, killing current " + "process\n"); + force_sig(SIGKILL, current); + } +} + +int flush_tlb_kernel_range_common(unsigned long start, unsigned long end) +{ + struct mm_struct *mm; + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + unsigned long addr, last; + int updated = 0, err; + + mm = &init_mm; + for (addr = start; addr < end;) { + pgd = pgd_offset(mm, addr); + if (!pgd_present(*pgd)) { + last = ADD_ROUND(addr, PGDIR_SIZE); + if (last > end) + last = end; + if (pgd_newpage(*pgd)) { + updated = 1; + err = os_unmap_memory((void *) addr, + last - addr); + if (err < 0) + panic("munmap failed, errno = %d\n", + -err); + } + addr = last; + continue; + } + + pud = pud_offset(pgd, addr); + if (!pud_present(*pud)) { + last = ADD_ROUND(addr, PUD_SIZE); + if (last > end) + last = end; + if (pud_newpage(*pud)) { + updated = 1; + err = os_unmap_memory((void *) addr, + last - addr); + if (err < 0) + panic("munmap failed, errno = %d\n", + -err); + } + addr = last; + continue; + } + + pmd = pmd_offset(pud, addr); + if (!pmd_present(*pmd)) { + last = ADD_ROUND(addr, PMD_SIZE); + if (last > end) + last = end; + if (pmd_newpage(*pmd)) { + updated = 1; + err = os_unmap_memory((void *) addr, + last - addr); + if (err < 0) + panic("munmap failed, errno = %d\n", + -err); + } + addr = last; + continue; + } + + pte = pte_offset_kernel(pmd, addr); + if (!pte_present(*pte) || pte_newpage(*pte)) { + updated = 1; + err = os_unmap_memory((void *) addr, + PAGE_SIZE); + if (err < 0) + panic("munmap failed, errno = %d\n", + -err); + if (pte_present(*pte)) + map_memory(addr, + pte_val(*pte) & PAGE_MASK, + PAGE_SIZE, 1, 1, 1); + } + else if (pte_newprot(*pte)) { + updated = 1; + os_protect_memory((void *) addr, PAGE_SIZE, 1, 1, 1); + } + addr += PAGE_SIZE; + } + return updated; +} + +void flush_tlb_page(struct vm_area_struct *vma, unsigned long address) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + struct mm_struct *mm = vma->vm_mm; + void *flush = NULL; + int r, w, x, prot, err = 0; + struct mm_id *mm_id; + + address &= PAGE_MASK; + pgd = pgd_offset(mm, address); + if (!pgd_present(*pgd)) + goto kill; + + pud = pud_offset(pgd, address); + if (!pud_present(*pud)) + goto kill; + + pmd = pmd_offset(pud, address); + if (!pmd_present(*pmd)) + goto kill; + + pte = pte_offset_kernel(pmd, address); + + r = pte_read(*pte); + w = pte_write(*pte); + x = pte_exec(*pte); + if (!pte_young(*pte)) { + r = 0; + w = 0; + } else if (!pte_dirty(*pte)) { + w = 0; + } + + mm_id = &mm->context.id; + prot = ((r ? UM_PROT_READ : 0) | (w ? UM_PROT_WRITE : 0) | + (x ? UM_PROT_EXEC : 0)); + if (pte_newpage(*pte)) { + if (pte_present(*pte)) { + unsigned long long offset; + int fd; + + fd = phys_mapping(pte_val(*pte) & PAGE_MASK, &offset); + err = map(mm_id, address, PAGE_SIZE, prot, fd, offset, + 1, &flush); + } + else err = unmap(mm_id, address, PAGE_SIZE, 1, &flush); + } + else if (pte_newprot(*pte)) + err = protect(mm_id, address, PAGE_SIZE, prot, 1, &flush); + + if (err) + goto kill; + + *pte = pte_mkuptodate(*pte); + + return; + +kill: + printk(KERN_ERR "Failed to flush page for address 0x%lx\n", address); + force_sig(SIGKILL, current); +} + +pgd_t *pgd_offset_proc(struct mm_struct *mm, unsigned long address) +{ + return pgd_offset(mm, address); +} + +pud_t *pud_offset_proc(pgd_t *pgd, unsigned long address) +{ + return pud_offset(pgd, address); +} + +pmd_t *pmd_offset_proc(pud_t *pud, unsigned long address) +{ + return pmd_offset(pud, address); +} + +pte_t *pte_offset_proc(pmd_t *pmd, unsigned long address) +{ + return pte_offset_kernel(pmd, address); +} + +pte_t *addr_pte(struct task_struct *task, unsigned long addr) +{ + pgd_t *pgd = pgd_offset(task->mm, addr); + pud_t *pud = pud_offset(pgd, addr); + pmd_t *pmd = pmd_offset(pud, addr); + + return pte_offset_map(pmd, addr); +} + +void flush_tlb_all(void) +{ + flush_tlb_mm(current->mm); +} + +void flush_tlb_kernel_range(unsigned long start, unsigned long end) +{ + flush_tlb_kernel_range_common(start, end); +} + +void flush_tlb_kernel_vm(void) +{ + flush_tlb_kernel_range_common(start_vm, end_vm); +} + +void __flush_tlb_one(unsigned long addr) +{ + flush_tlb_kernel_range_common(addr, addr + PAGE_SIZE); +} + +static void fix_range(struct mm_struct *mm, unsigned long start_addr, + unsigned long end_addr, int force) +{ + fix_range_common(mm, start_addr, end_addr, force); +} + +void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end) +{ + if (vma->vm_mm == NULL) + flush_tlb_kernel_range_common(start, end); + else fix_range(vma->vm_mm, start, end, 0); +} + +void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, + unsigned long end) +{ + /* + * Don't bother flushing if this address space is about to be + * destroyed. + */ + if (atomic_read(&mm->mm_users) == 0) + return; + + fix_range(mm, start, end, 0); +} + +void flush_tlb_mm(struct mm_struct *mm) +{ + struct vm_area_struct *vma = mm->mmap; + + while (vma != NULL) { + fix_range(mm, vma->vm_start, vma->vm_end, 0); + vma = vma->vm_next; + } +} + +void force_flush_all(void) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma = mm->mmap; + + while (vma != NULL) { + fix_range(mm, vma->vm_start, vma->vm_end, 1); + vma = vma->vm_next; + } +} diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c new file mode 100644 index 0000000..44e4904 --- /dev/null +++ b/arch/um/kernel/trap.c @@ -0,0 +1,259 @@ +/* + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include <linux/mm.h> +#include <linux/sched.h> +#include <linux/hardirq.h> +#include <asm/current.h> +#include <asm/pgtable.h> +#include <asm/tlbflush.h> +#include "arch.h" +#include "as-layout.h" +#include "kern_util.h" +#include "os.h" +#include "skas.h" +#include "sysdep/sigcontext.h" + +/* + * Note this is constrained to return 0, -EFAULT, -EACCESS, -ENOMEM by + * segv(). + */ +int handle_page_fault(unsigned long address, unsigned long ip, + int is_write, int is_user, int *code_out) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + int err = -EFAULT; + + *code_out = SEGV_MAPERR; + + /* + * If the fault was during atomic operation, don't take the fault, just + * fail. + */ + if (in_atomic()) + goto out_nosemaphore; + + down_read(&mm->mmap_sem); + vma = find_vma(mm, address); + if (!vma) + goto out; + else if (vma->vm_start <= address) + goto good_area; + else if (!(vma->vm_flags & VM_GROWSDOWN)) + goto out; + else if (is_user && !ARCH_IS_STACKGROW(address)) + goto out; + else if (expand_stack(vma, address)) + goto out; + +good_area: + *code_out = SEGV_ACCERR; + if (is_write && !(vma->vm_flags & VM_WRITE)) + goto out; + + /* Don't require VM_READ|VM_EXEC for write faults! */ + if (!is_write && !(vma->vm_flags & (VM_READ | VM_EXEC))) + goto out; + + do { + int fault; +survive: + fault = handle_mm_fault(mm, vma, address, is_write); + if (unlikely(fault & VM_FAULT_ERROR)) { + if (fault & VM_FAULT_OOM) { + err = -ENOMEM; + goto out_of_memory; + } else if (fault & VM_FAULT_SIGBUS) { + err = -EACCES; + goto out; + } + BUG(); + } + if (fault & VM_FAULT_MAJOR) + current->maj_flt++; + else + current->min_flt++; + + pgd = pgd_offset(mm, address); + pud = pud_offset(pgd, address); + pmd = pmd_offset(pud, address); + pte = pte_offset_kernel(pmd, address); + } while (!pte_present(*pte)); + err = 0; + /* + * The below warning was added in place of + * pte_mkyoung(); if (is_write) pte_mkdirty(); + * If it's triggered, we'd see normally a hang here (a clean pte is + * marked read-only to emulate the dirty bit). + * However, the generic code can mark a PTE writable but clean on a + * concurrent read fault, triggering this harmlessly. So comment it out. + */ +#if 0 + WARN_ON(!pte_young(*pte) || (is_write && !pte_dirty(*pte))); +#endif + flush_tlb_page(vma, address); +out: + up_read(&mm->mmap_sem); +out_nosemaphore: + return err; + +/* + * We ran out of memory, or some other thing happened to us that made + * us unable to handle the page fault gracefully. + */ +out_of_memory: + if (is_global_init(current)) { + up_read(&mm->mmap_sem); + yield(); + down_read(&mm->mmap_sem); + goto survive; + } + goto out; +} + +static void bad_segv(struct faultinfo fi, unsigned long ip) +{ + struct siginfo si; + + si.si_signo = SIGSEGV; + si.si_code = SEGV_ACCERR; + si.si_addr = (void __user *) FAULT_ADDRESS(fi); + current->thread.arch.faultinfo = fi; + force_sig_info(SIGSEGV, &si, current); +} + +void fatal_sigsegv(void) +{ + force_sigsegv(SIGSEGV, current); + do_signal(); + /* + * This is to tell gcc that we're not returning - do_signal + * can, in general, return, but in this case, it's not, since + * we just got a fatal SIGSEGV queued. + */ + os_dump_core(); +} + +void segv_handler(int sig, struct uml_pt_regs *regs) +{ + struct faultinfo * fi = UPT_FAULTINFO(regs); + + if (UPT_IS_USER(regs) && !SEGV_IS_FIXABLE(fi)) { + bad_segv(*fi, UPT_IP(regs)); + return; + } + segv(*fi, UPT_IP(regs), UPT_IS_USER(regs), regs); +} + +/* + * We give a *copy* of the faultinfo in the regs to segv. + * This must be done, since nesting SEGVs could overwrite + * the info in the regs. A pointer to the info then would + * give us bad data! + */ +unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user, + struct uml_pt_regs *regs) +{ + struct siginfo si; + jmp_buf *catcher; + int err; + int is_write = FAULT_WRITE(fi); + unsigned long address = FAULT_ADDRESS(fi); + + if (!is_user && (address >= start_vm) && (address < end_vm)) { + flush_tlb_kernel_vm(); + return 0; + } + else if (current->mm == NULL) { + show_regs(container_of(regs, struct pt_regs, regs)); + panic("Segfault with no mm"); + } + + if (SEGV_IS_FIXABLE(&fi) || SEGV_MAYBE_FIXABLE(&fi)) + err = handle_page_fault(address, ip, is_write, is_user, + &si.si_code); + else { + err = -EFAULT; + /* + * A thread accessed NULL, we get a fault, but CR2 is invalid. + * This code is used in __do_copy_from_user() of TT mode. + * XXX tt mode is gone, so maybe this isn't needed any more + */ + address = 0; + } + + catcher = current->thread.fault_catcher; + if (!err) + return 0; + else if (catcher != NULL) { + current->thread.fault_addr = (void *) address; + UML_LONGJMP(catcher, 1); + } + else if (current->thread.fault_addr != NULL) + panic("fault_addr set but no fault catcher"); + else if (!is_user && arch_fixup(ip, regs)) + return 0; + + if (!is_user) { + show_regs(container_of(regs, struct pt_regs, regs)); + panic("Kernel mode fault at addr 0x%lx, ip 0x%lx", + address, ip); + } + + if (err == -EACCES) { + si.si_signo = SIGBUS; + si.si_errno = 0; + si.si_code = BUS_ADRERR; + si.si_addr = (void __user *)address; + current->thread.arch.faultinfo = fi; + force_sig_info(SIGBUS, &si, current); + } else if (err == -ENOMEM) { + printk(KERN_INFO "VM: killing process %s\n", current->comm); + do_exit(SIGKILL); + } else { + BUG_ON(err != -EFAULT); + si.si_signo = SIGSEGV; + si.si_addr = (void __user *) address; + current->thread.arch.faultinfo = fi; + force_sig_info(SIGSEGV, &si, current); + } + return 0; +} + +void relay_signal(int sig, struct uml_pt_regs *regs) +{ + if (!UPT_IS_USER(regs)) { + if (sig == SIGBUS) + printk(KERN_ERR "Bus error - the host /dev/shm or /tmp " + "mount likely just ran out of space\n"); + panic("Kernel mode signal %d", sig); + } + + arch_examine_signal(sig, regs); + + current->thread.arch.faultinfo = *UPT_FAULTINFO(regs); + force_sig(sig, current); +} + +void bus_handler(int sig, struct uml_pt_regs *regs) +{ + if (current->thread.fault_catcher != NULL) + UML_LONGJMP(current->thread.fault_catcher, 1); + else relay_signal(sig, regs); +} + +void winch(int sig, struct uml_pt_regs *regs) +{ + do_IRQ(WINCH_IRQ, regs); +} + +void trap_init(void) +{ +} diff --git a/arch/um/kernel/uaccess.c b/arch/um/kernel/uaccess.c new file mode 100644 index 0000000..dd33f04 --- /dev/null +++ b/arch/um/kernel/uaccess.c @@ -0,0 +1,33 @@ +/* + * Copyright (C) 2001 Chris Emerson (cemerson@chiark.greenend.org.uk) + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +/* + * These are here rather than tt/uaccess.c because skas mode needs them in + * order to do SIGBUS recovery when a tmpfs mount runs out of room. + */ + +#include <linux/string.h> +#include "os.h" + +static void __do_copy(void *to, const void *from, int n) +{ + memcpy(to, from, n); +} + + +int __do_copy_to_user(void *to, const void *from, int n, + void **fault_addr, jmp_buf **fault_catcher) +{ + unsigned long fault; + int faulted; + + fault = __do_user_copy(to, from, n, fault_addr, fault_catcher, + __do_copy, &faulted); + if (!faulted) + return 0; + else + return n - (fault - (unsigned long) to); +} diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c new file mode 100644 index 0000000..8d84250 --- /dev/null +++ b/arch/um/kernel/um_arch.c @@ -0,0 +1,402 @@ +/* + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include <linux/delay.h> +#include <linux/init.h> +#include <linux/mm.h> +#include <linux/module.h> +#include <linux/seq_file.h> +#include <linux/string.h> +#include <linux/utsname.h> +#include <asm/pgtable.h> +#include <asm/processor.h> +#include <asm/setup.h> +#include "as-layout.h" +#include "arch.h" +#include "init.h" +#include "kern.h" +#include "kern_util.h" +#include "mem_user.h" +#include "os.h" + +#define DEFAULT_COMMAND_LINE "root=98:0" + +/* Changed in add_arg and setup_arch, which run before SMP is started */ +static char __initdata command_line[COMMAND_LINE_SIZE] = { 0 }; + +static void __init add_arg(char *arg) +{ + if (strlen(command_line) + strlen(arg) + 1 > COMMAND_LINE_SIZE) { + printf("add_arg: Too many command line arguments!\n"); + exit(1); + } + if (strlen(command_line) > 0) + strcat(command_line, " "); + strcat(command_line, arg); +} + +/* + * These fields are initialized at boot time and not changed. + * XXX This structure is used only in the non-SMP case. Maybe this + * should be moved to smp.c. + */ +struct cpuinfo_um boot_cpu_data = { + .loops_per_jiffy = 0, + .ipi_pipe = { -1, -1 } +}; + +unsigned long thread_saved_pc(struct task_struct *task) +{ + /* FIXME: Need to look up userspace_pid by cpu */ + return os_process_pc(userspace_pid[0]); +} + +/* Changed in setup_arch, which is called in early boot */ +static char host_info[(__NEW_UTS_LEN + 1) * 5]; + +static int show_cpuinfo(struct seq_file *m, void *v) +{ + int index = 0; + +#ifdef CONFIG_SMP + index = (struct cpuinfo_um *) v - cpu_data; + if (!cpu_online(index)) + return 0; +#endif + + seq_printf(m, "processor\t: %d\n", index); + seq_printf(m, "vendor_id\t: User Mode Linux\n"); + seq_printf(m, "model name\t: UML\n"); + seq_printf(m, "mode\t\t: skas\n"); + seq_printf(m, "host\t\t: %s\n", host_info); + seq_printf(m, "bogomips\t: %lu.%02lu\n\n", + loops_per_jiffy/(500000/HZ), + (loops_per_jiffy/(5000/HZ)) % 100); + + return 0; +} + +static void *c_start(struct seq_file *m, loff_t *pos) +{ + return *pos < NR_CPUS ? cpu_data + *pos : NULL; +} + +static void *c_next(struct seq_file *m, void *v, loff_t *pos) +{ + ++*pos; + return c_start(m, pos); +} + +static void c_stop(struct seq_file *m, void *v) +{ +} + +const struct seq_operations cpuinfo_op = { + .start = c_start, + .next = c_next, + .stop = c_stop, + .show = show_cpuinfo, +}; + +/* Set in linux_main */ +unsigned long uml_physmem; +unsigned long uml_reserved; /* Also modified in mem_init */ +unsigned long start_vm; +unsigned long end_vm; + +/* Set in uml_ncpus_setup */ +int ncpus = 1; + +/* Set in early boot */ +static int have_root __initdata = 0; + +/* Set in uml_mem_setup and modified in linux_main */ +long long physmem_size = 32 * 1024 * 1024; + +static const char *usage_string = +"User Mode Linux v%s\n" +" available at http://user-mode-linux.sourceforge.net/\n\n"; + +static int __init uml_version_setup(char *line, int *add) +{ + printf("%s\n", init_utsname()->release); + exit(0); + + return 0; +} + +__uml_setup("--version", uml_version_setup, +"--version\n" +" Prints the version number of the kernel.\n\n" +); + +static int __init uml_root_setup(char *line, int *add) +{ + have_root = 1; + return 0; +} + +__uml_setup("root=", uml_root_setup, +"root=<file containing the root fs>\n" +" This is actually used by the generic kernel in exactly the same\n" +" way as in any other kernel. If you configure a number of block\n" +" devices and want to boot off something other than ubd0, you \n" +" would use something like:\n" +" root=/dev/ubd5\n\n" +); + +static int __init no_skas_debug_setup(char *line, int *add) +{ + printf("'debug' is not necessary to gdb UML in skas mode - run \n"); + printf("'gdb linux'\n"); + + return 0; +} + +__uml_setup("debug", no_skas_debug_setup, +"debug\n" +" this flag is not needed to run gdb on UML in skas mode\n\n" +); + +#ifdef CONFIG_SMP +static int __init uml_ncpus_setup(char *line, int *add) +{ + if (!sscanf(line, "%d", &ncpus)) { + printf("Couldn't parse [%s]\n", line); + return -1; + } + + return 0; +} + +__uml_setup("ncpus=", uml_ncpus_setup, +"ncpus=<# of desired CPUs>\n" +" This tells an SMP kernel how many virtual processors to start.\n\n" +); +#endif + +static int __init Usage(char *line, int *add) +{ + const char **p; + + printf(usage_string, init_utsname()->release); + p = &__uml_help_start; + while (p < &__uml_help_end) { + printf("%s", *p); + p++; + } + exit(0); + return 0; +} + +__uml_setup("--help", Usage, +"--help\n" +" Prints this message.\n\n" +); + +static void __init uml_checksetup(char *line, int *add) +{ + struct uml_param *p; + + p = &__uml_setup_start; + while (p < &__uml_setup_end) { + size_t n; + + n = strlen(p->str); + if (!strncmp(line, p->str, n) && p->setup_func(line + n, add)) + return; + p++; + } +} + +static void __init uml_postsetup(void) +{ + initcall_t *p; + + p = &__uml_postsetup_start; + while (p < &__uml_postsetup_end) { + (*p)(); + p++; + } + return; +} + +static int panic_exit(struct notifier_block *self, unsigned long unused1, + void *unused2) +{ + bust_spinlocks(1); + show_regs(&(current->thread.regs)); + bust_spinlocks(0); + uml_exitcode = 1; + os_dump_core(); + return 0; +} + +static struct notifier_block panic_exit_notifier = { + .notifier_call = panic_exit, + .next = NULL, + .priority = 0 +}; + +/* Set during early boot */ +unsigned long task_size; +EXPORT_SYMBOL(task_size); + +unsigned long host_task_size; + +unsigned long brk_start; +unsigned long end_iomem; +EXPORT_SYMBOL(end_iomem); + +#define MIN_VMALLOC (32 * 1024 * 1024) + +extern char __binary_start; + +int __init linux_main(int argc, char **argv) +{ + unsigned long avail, diff; + unsigned long virtmem_size, max_physmem; + unsigned long stack; + unsigned int i; + int add; + char * mode; + + for (i = 1; i < argc; i++) { + if ((i == 1) && (argv[i][0] == ' ')) + continue; + add = 1; + uml_checksetup(argv[i], &add); + if (add) + add_arg(argv[i]); + } + if (have_root == 0) + add_arg(DEFAULT_COMMAND_LINE); + + host_task_size = os_get_top_address(); + /* + * TASK_SIZE needs to be PGDIR_SIZE aligned or else exit_mmap craps + * out + */ + task_size = host_task_size & PGDIR_MASK; + + /* OS sanity checks that need to happen before the kernel runs */ + os_early_checks(); + + can_do_skas(); + + if (proc_mm && ptrace_faultinfo) + mode = "SKAS3"; + else + mode = "SKAS0"; + + printf("UML running in %s mode\n", mode); + + brk_start = (unsigned long) sbrk(0); + + /* + * Increase physical memory size for exec-shield users + * so they actually get what they asked for. This should + * add zero for non-exec shield users + */ + + diff = UML_ROUND_UP(brk_start) - UML_ROUND_UP(&_end); + if (diff > 1024 * 1024) { + printf("Adding %ld bytes to physical memory to account for " + "exec-shield gap\n", diff); + physmem_size += UML_ROUND_UP(brk_start) - UML_ROUND_UP(&_end); + } + + uml_physmem = (unsigned long) &__binary_start & PAGE_MASK; + + /* Reserve up to 4M after the current brk */ + uml_reserved = ROUND_4M(brk_start) + (1 << 22); + + setup_machinename(init_utsname()->machine); + + highmem = 0; + iomem_size = (iomem_size + PAGE_SIZE - 1) & PAGE_MASK; + max_physmem = TASK_SIZE - uml_physmem - iomem_size - MIN_VMALLOC; + + /* + * Zones have to begin on a 1 << MAX_ORDER page boundary, + * so this makes sure that's true for highmem + */ + max_physmem &= ~((1 << (PAGE_SHIFT + MAX_ORDER)) - 1); + if (physmem_size + iomem_size > max_physmem) { + highmem = physmem_size + iomem_size - max_physmem; + physmem_size -= highmem; +#ifndef CONFIG_HIGHMEM + highmem = 0; + printf("CONFIG_HIGHMEM not enabled - physical memory shrunk " + "to %Lu bytes\n", physmem_size); +#endif + } + + high_physmem = uml_physmem + physmem_size; + end_iomem = high_physmem + iomem_size; + high_memory = (void *) end_iomem; + + start_vm = VMALLOC_START; + + setup_physmem(uml_physmem, uml_reserved, physmem_size, highmem); + if (init_maps(physmem_size, iomem_size, highmem)) { + printf("Failed to allocate mem_map for %Lu bytes of physical " + "memory and %Lu bytes of highmem\n", physmem_size, + highmem); + exit(1); + } + + virtmem_size = physmem_size; + stack = (unsigned long) argv; + stack &= ~(1024 * 1024 - 1); + avail = stack - start_vm; + if (physmem_size > avail) + virtmem_size = avail; + end_vm = start_vm + virtmem_size; + + if (virtmem_size < physmem_size) + printf("Kernel virtual memory size shrunk to %lu bytes\n", + virtmem_size); + + atomic_notifier_chain_register(&panic_notifier_list, + &panic_exit_notifier); + + uml_postsetup(); + + stack_protections((unsigned long) &init_thread_info); + os_flush_stdout(); + + return start_uml(); +} + +void __init setup_arch(char **cmdline_p) +{ + paging_init(); + strlcpy(boot_command_line, command_line, COMMAND_LINE_SIZE); + *cmdline_p = command_line; + setup_hostinfo(host_info, sizeof host_info); +} + +void __init check_bugs(void) +{ + arch_check_bugs(); + os_check_bugs(); +} + +void apply_alternatives(struct alt_instr *start, struct alt_instr *end) +{ +} + +#ifdef CONFIG_SMP +void alternatives_smp_module_add(struct module *mod, char *name, + void *locks, void *locks_end, + void *text, void *text_end) +{ +} + +void alternatives_smp_module_del(struct module *mod) +{ +} +#endif diff --git a/arch/um/kernel/umid.c b/arch/um/kernel/umid.c new file mode 100644 index 0000000..81e07e2 --- /dev/null +++ b/arch/um/kernel/umid.c @@ -0,0 +1,38 @@ +/* + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include <asm/errno.h> +#include "init.h" +#include "kern.h" +#include "os.h" + +/* Changed by set_umid_arg */ +static int umid_inited = 0; + +static int __init set_umid_arg(char *name, int *add) +{ + int err; + + if (umid_inited) { + printf("umid already set\n"); + return 0; + } + + *add = 0; + err = set_umid(name); + if (err == -EEXIST) + printf("umid '%s' already in use\n", name); + else if (!err) + umid_inited = 1; + + return 0; +} + +__uml_setup("umid=", set_umid_arg, +"umid=<name>\n" +" This is used to assign a unique identity to this UML machine and\n" +" is used for naming the pid file and management console socket.\n\n" +); + diff --git a/arch/um/kernel/uml.lds.S b/arch/um/kernel/uml.lds.S new file mode 100644 index 0000000..11b8352 --- /dev/null +++ b/arch/um/kernel/uml.lds.S @@ -0,0 +1,103 @@ +#include <asm-generic/vmlinux.lds.h> +#include <asm/page.h> + +OUTPUT_FORMAT(ELF_FORMAT) +OUTPUT_ARCH(ELF_ARCH) +ENTRY(_start) +jiffies = jiffies_64; + +SECTIONS +{ + /* This must contain the right address - not quite the default ELF one.*/ + PROVIDE (__executable_start = START); + /* Static binaries stick stuff here, like the sigreturn trampoline, + * invisibly to objdump. So, just make __binary_start equal to the very + * beginning of the executable, and if there are unmapped pages after this, + * they are forever unusable. + */ + __binary_start = START; + + . = START + SIZEOF_HEADERS; + + _text = .; + _stext = .; + __init_begin = .; + .init.text : { + _sinittext = .; + INIT_TEXT + _einittext = .; + } + . = ALIGN(PAGE_SIZE); + + .text : + { + TEXT_TEXT + SCHED_TEXT + LOCK_TEXT + *(.fixup) + /* .gnu.warning sections are handled specially by elf32.em. */ + *(.gnu.warning) + *(.gnu.linkonce.t*) + } + + . = ALIGN(PAGE_SIZE); + .syscall_stub : { + __syscall_stub_start = .; + *(.__syscall_stub*) + __syscall_stub_end = .; + } + + #include "asm/common.lds.S" + + init.data : { INIT_DATA } + .data : + { + . = ALIGN(KERNEL_STACK_SIZE); /* init_task */ + *(.data.init_task) + . = ALIGN(KERNEL_STACK_SIZE); + *(.data.init_irqstack) + DATA_DATA + *(.gnu.linkonce.d*) + CONSTRUCTORS + } + .data1 : { *(.data1) } + .ctors : + { + *(.ctors) + } + .dtors : + { + *(.dtors) + } + + .got : { *(.got.plt) *(.got) } + .dynamic : { *(.dynamic) } + .tdata : { *(.tdata .tdata.* .gnu.linkonce.td.*) } + .tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) } + /* We want the small data sections together, so single-instruction offsets + can access them all, and initialized data all before uninitialized, so + we can shorten the on-disk segment size. */ + .sdata : { *(.sdata) } + _edata = .; + PROVIDE (edata = .); + . = ALIGN(PAGE_SIZE); + .sbss : + { + __bss_start = .; + PROVIDE(_bss_start = .); + *(.sbss) + *(.scommon) + } + .bss : + { + *(.dynbss) + *(.bss) + *(COMMON) + } + _end = .; + PROVIDE (end = .); + + STABS_DEBUG + + DWARF_DEBUG +} diff --git a/arch/um/kernel/vmlinux.lds.S b/arch/um/kernel/vmlinux.lds.S new file mode 100644 index 0000000..f8aeb44 --- /dev/null +++ b/arch/um/kernel/vmlinux.lds.S @@ -0,0 +1,5 @@ +#ifdef CONFIG_LD_SCRIPT_STATIC +#include "uml.lds.S" +#else +#include "dyn.lds.S" +#endif diff --git a/arch/um/os-Linux/Makefile b/arch/um/os-Linux/Makefile new file mode 100644 index 0000000..d66f038 --- /dev/null +++ b/arch/um/os-Linux/Makefile @@ -0,0 +1,20 @@ +# +# Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) +# Licensed under the GPL +# + +obj-y = aio.o elf_aux.o execvp.o file.o helper.o irq.o main.o mem.o process.o \ + registers.o sigio.o signal.o start_up.o time.o tty.o uaccess.o \ + umid.o tls.o user_syms.o util.o drivers/ sys-$(SUBARCH)/ skas/ + +USER_OBJS := $(user-objs-y) aio.o elf_aux.o execvp.o file.o helper.o irq.o \ + main.o mem.o process.o registers.o sigio.o signal.o start_up.o time.o \ + tty.o tls.o uaccess.o umid.o util.o + +CFLAGS_user_syms.o += -DSUBARCH_$(SUBARCH) + +HAVE_AIO_ABI := $(shell [ -r /usr/include/linux/aio_abi.h ] && \ + echo -DHAVE_AIO_ABI ) +CFLAGS_aio.o += $(HAVE_AIO_ABI) + +include arch/um/scripts/Makefile.rules diff --git a/arch/um/os-Linux/aio.c b/arch/um/os-Linux/aio.c new file mode 100644 index 0000000..57e3d46 --- /dev/null +++ b/arch/um/os-Linux/aio.c @@ -0,0 +1,393 @@ +/* + * Copyright (C) 2004 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include <unistd.h> +#include <sched.h> +#include <signal.h> +#include <errno.h> +#include <sys/time.h> +#include <asm/unistd.h> +#include "aio.h" +#include "init.h" +#include "kern_constants.h" +#include "kern_util.h" +#include "os.h" +#include "user.h" + +struct aio_thread_req { + enum aio_type type; + int io_fd; + unsigned long long offset; + char *buf; + int len; + struct aio_context *aio; +}; + +#if defined(HAVE_AIO_ABI) +#include <linux/aio_abi.h> + +/* + * If we have the headers, we are going to build with AIO enabled. + * If we don't have aio in libc, we define the necessary stubs here. + */ + +#if !defined(HAVE_AIO_LIBC) + +static long io_setup(int n, aio_context_t *ctxp) +{ + return syscall(__NR_io_setup, n, ctxp); +} + +static long io_submit(aio_context_t ctx, long nr, struct iocb **iocbpp) +{ + return syscall(__NR_io_submit, ctx, nr, iocbpp); +} + +static long io_getevents(aio_context_t ctx_id, long min_nr, long nr, + struct io_event *events, struct timespec *timeout) +{ + return syscall(__NR_io_getevents, ctx_id, min_nr, nr, events, timeout); +} + +#endif + +/* + * The AIO_MMAP cases force the mmapped page into memory here + * rather than in whatever place first touches the data. I used + * to do this by touching the page, but that's delicate because + * gcc is prone to optimizing that away. So, what's done here + * is we read from the descriptor from which the page was + * mapped. The caller is required to pass an offset which is + * inside the page that was mapped. Thus, when the read + * returns, we know that the page is in the page cache, and + * that it now backs the mmapped area. + */ + +static int do_aio(aio_context_t ctx, enum aio_type type, int fd, char *buf, + int len, unsigned long long offset, struct aio_context *aio) +{ + struct iocb *iocbp = & ((struct iocb) { + .aio_data = (unsigned long) aio, + .aio_fildes = fd, + .aio_buf = (unsigned long) buf, + .aio_nbytes = len, + .aio_offset = offset + }); + char c; + + switch (type) { + case AIO_READ: + iocbp->aio_lio_opcode = IOCB_CMD_PREAD; + break; + case AIO_WRITE: + iocbp->aio_lio_opcode = IOCB_CMD_PWRITE; + break; + case AIO_MMAP: + iocbp->aio_lio_opcode = IOCB_CMD_PREAD; + iocbp->aio_buf = (unsigned long) &c; + iocbp->aio_nbytes = sizeof(c); + break; + default: + printk(UM_KERN_ERR "Bogus op in do_aio - %d\n", type); + return -EINVAL; + } + + return (io_submit(ctx, 1, &iocbp) > 0) ? 0 : -errno; +} + +/* Initialized in an initcall and unchanged thereafter */ +static aio_context_t ctx = 0; + +static int aio_thread(void *arg) +{ + struct aio_thread_reply reply; + struct io_event event; + int err, n, reply_fd; + + signal(SIGWINCH, SIG_IGN); + + while (1) { + n = io_getevents(ctx, 1, 1, &event, NULL); + if (n < 0) { + if (errno == EINTR) + continue; + printk(UM_KERN_ERR "aio_thread - io_getevents failed, " + "errno = %d\n", errno); + } + else { + reply = ((struct aio_thread_reply) + { .data = (void *) (long) event.data, + .err = event.res }); + reply_fd = ((struct aio_context *) reply.data)->reply_fd; + err = write(reply_fd, &reply, sizeof(reply)); + if (err != sizeof(reply)) + printk(UM_KERN_ERR "aio_thread - write failed, " + "fd = %d, err = %d\n", reply_fd, errno); + } + } + return 0; +} + +#endif + +static int do_not_aio(struct aio_thread_req *req) +{ + char c; + unsigned long long actual; + int n; + + actual = lseek64(req->io_fd, req->offset, SEEK_SET); + if (actual != req->offset) + return -errno; + + switch (req->type) { + case AIO_READ: + n = read(req->io_fd, req->buf, req->len); + break; + case AIO_WRITE: + n = write(req->io_fd, req->buf, req->len); + break; + case AIO_MMAP: + n = read(req->io_fd, &c, sizeof(c)); + break; + default: + printk(UM_KERN_ERR "do_not_aio - bad request type : %d\n", + req->type); + return -EINVAL; + } + + if (n < 0) + return -errno; + return 0; +} + +/* These are initialized in initcalls and not changed */ +static int aio_req_fd_r = -1; +static int aio_req_fd_w = -1; +static int aio_pid = -1; +static unsigned long aio_stack; + +static int not_aio_thread(void *arg) +{ + struct aio_thread_req req; + struct aio_thread_reply reply; + int err; + + signal(SIGWINCH, SIG_IGN); + while (1) { + err = read(aio_req_fd_r, &req, sizeof(req)); + if (err != sizeof(req)) { + if (err < 0) + printk(UM_KERN_ERR "not_aio_thread - " + "read failed, fd = %d, err = %d\n", + aio_req_fd_r, + errno); + else { + printk(UM_KERN_ERR "not_aio_thread - short " + "read, fd = %d, length = %d\n", + aio_req_fd_r, err); + } + continue; + } + err = do_not_aio(&req); + reply = ((struct aio_thread_reply) { .data = req.aio, + .err = err }); + err = write(req.aio->reply_fd, &reply, sizeof(reply)); + if (err != sizeof(reply)) + printk(UM_KERN_ERR "not_aio_thread - write failed, " + "fd = %d, err = %d\n", req.aio->reply_fd, errno); + } + + return 0; +} + +static int init_aio_24(void) +{ + int fds[2], err; + + err = os_pipe(fds, 1, 1); + if (err) + goto out; + + aio_req_fd_w = fds[0]; + aio_req_fd_r = fds[1]; + + err = os_set_fd_block(aio_req_fd_w, 0); + if (err) + goto out_close_pipe; + + err = run_helper_thread(not_aio_thread, NULL, + CLONE_FILES | CLONE_VM, &aio_stack); + if (err < 0) + goto out_close_pipe; + + aio_pid = err; + goto out; + +out_close_pipe: + close(fds[0]); + close(fds[1]); + aio_req_fd_w = -1; + aio_req_fd_r = -1; +out: +#ifndef HAVE_AIO_ABI + printk(UM_KERN_INFO "/usr/include/linux/aio_abi.h not present during " + "build\n"); +#endif + printk(UM_KERN_INFO "2.6 host AIO support not used - falling back to " + "I/O thread\n"); + return 0; +} + +#ifdef HAVE_AIO_ABI +#define DEFAULT_24_AIO 0 +static int init_aio_26(void) +{ + int err; + + if (io_setup(256, &ctx)) { + err = -errno; + printk(UM_KERN_ERR "aio_thread failed to initialize context, " + "err = %d\n", errno); + return err; + } + + err = run_helper_thread(aio_thread, NULL, + CLONE_FILES | CLONE_VM, &aio_stack); + if (err < 0) + return err; + + aio_pid = err; + + printk(UM_KERN_INFO "Using 2.6 host AIO\n"); + return 0; +} + +static int submit_aio_26(enum aio_type type, int io_fd, char *buf, int len, + unsigned long long offset, struct aio_context *aio) +{ + struct aio_thread_reply reply; + int err; + + err = do_aio(ctx, type, io_fd, buf, len, offset, aio); + if (err) { + reply = ((struct aio_thread_reply) { .data = aio, + .err = err }); + err = write(aio->reply_fd, &reply, sizeof(reply)); + if (err != sizeof(reply)) { + err = -errno; + printk(UM_KERN_ERR "submit_aio_26 - write failed, " + "fd = %d, err = %d\n", aio->reply_fd, -err); + } + else err = 0; + } + + return err; +} + +#else +#define DEFAULT_24_AIO 1 +static int init_aio_26(void) +{ + return -ENOSYS; +} + +static int submit_aio_26(enum aio_type type, int io_fd, char *buf, int len, + unsigned long long offset, struct aio_context *aio) +{ + return -ENOSYS; +} +#endif + +/* Initialized in an initcall and unchanged thereafter */ +static int aio_24 = DEFAULT_24_AIO; + +static int __init set_aio_24(char *name, int *add) +{ + aio_24 = 1; + return 0; +} + +__uml_setup("aio=2.4", set_aio_24, +"aio=2.4\n" +" This is used to force UML to use 2.4-style AIO even when 2.6 AIO is\n" +" available. 2.4 AIO is a single thread that handles one request at a\n" +" time, synchronously. 2.6 AIO is a thread which uses the 2.6 AIO \n" +" interface to handle an arbitrary number of pending requests. 2.6 AIO \n" +" is not available in tt mode, on 2.4 hosts, or when UML is built with\n" +" /usr/include/linux/aio_abi.h not available. Many distributions don't\n" +" include aio_abi.h, so you will need to copy it from a kernel tree to\n" +" your /usr/include/linux in order to build an AIO-capable UML\n\n" +); + +static int init_aio(void) +{ + int err; + + if (!aio_24) { + err = init_aio_26(); + if (err && (errno == ENOSYS)) { + printk(UM_KERN_INFO "2.6 AIO not supported on the " + "host - reverting to 2.4 AIO\n"); + aio_24 = 1; + } + else return err; + } + + if (aio_24) + return init_aio_24(); + + return 0; +} + +/* + * The reason for the __initcall/__uml_exitcall asymmetry is that init_aio + * needs to be called when the kernel is running because it calls run_helper, + * which needs get_free_page. exit_aio is a __uml_exitcall because the generic + * kernel does not run __exitcalls on shutdown, and can't because many of them + * break when called outside of module unloading. + */ +__initcall(init_aio); + +static void exit_aio(void) +{ + if (aio_pid != -1) { + os_kill_process(aio_pid, 1); + free_stack(aio_stack, 0); + } +} + +__uml_exitcall(exit_aio); + +static int submit_aio_24(enum aio_type type, int io_fd, char *buf, int len, + unsigned long long offset, struct aio_context *aio) +{ + struct aio_thread_req req = { .type = type, + .io_fd = io_fd, + .offset = offset, + .buf = buf, + .len = len, + .aio = aio, + }; + int err; + + err = write(aio_req_fd_w, &req, sizeof(req)); + if (err == sizeof(req)) + err = 0; + else err = -errno; + + return err; +} + +int submit_aio(enum aio_type type, int io_fd, char *buf, int len, + unsigned long long offset, int reply_fd, + struct aio_context *aio) +{ + aio->reply_fd = reply_fd; + if (aio_24) + return submit_aio_24(type, io_fd, buf, len, offset, aio); + else + return submit_aio_26(type, io_fd, buf, len, offset, aio); +} diff --git a/arch/um/os-Linux/drivers/Makefile b/arch/um/os-Linux/drivers/Makefile new file mode 100644 index 0000000..6c546dc --- /dev/null +++ b/arch/um/os-Linux/drivers/Makefile @@ -0,0 +1,13 @@ +# +# Copyright (C) 2000, 2002 Jeff Dike (jdike@karaya.com) +# Licensed under the GPL +# + +ethertap-objs := ethertap_kern.o ethertap_user.o +tuntap-objs := tuntap_kern.o tuntap_user.o + +obj-y = +obj-$(CONFIG_UML_NET_ETHERTAP) += ethertap.o +obj-$(CONFIG_UML_NET_TUNTAP) += tuntap.o + +include arch/um/scripts/Makefile.rules diff --git a/arch/um/os-Linux/drivers/etap.h b/arch/um/os-Linux/drivers/etap.h new file mode 100644 index 0000000..ddffd41 --- /dev/null +++ b/arch/um/os-Linux/drivers/etap.h @@ -0,0 +1,21 @@ +/* + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#ifndef __DRIVERS_ETAP_H +#define __DRIVERS_ETAP_H + +#include "net_user.h" + +struct ethertap_data { + char *dev_name; + char *gate_addr; + int data_fd; + int control_fd; + void *dev; +}; + +extern const struct net_user_info ethertap_user_info; + +#endif diff --git a/arch/um/os-Linux/drivers/ethertap_kern.c b/arch/um/os-Linux/drivers/ethertap_kern.c new file mode 100644 index 0000000..046a131 --- /dev/null +++ b/arch/um/os-Linux/drivers/ethertap_kern.c @@ -0,0 +1,100 @@ +/* + * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and + * James Leu (jleu@mindspring.net). + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Copyright (C) 2001 by various other people who didn't put their name here. + * Licensed under the GPL. + */ + +#include <linux/init.h> +#include <linux/netdevice.h> +#include "etap.h" +#include "net_kern.h" + +struct ethertap_init { + char *dev_name; + char *gate_addr; +}; + +static void etap_init(struct net_device *dev, void *data) +{ + struct uml_net_private *pri; + struct ethertap_data *epri; + struct ethertap_init *init = data; + + pri = dev->priv; + epri = (struct ethertap_data *) pri->user; + epri->dev_name = init->dev_name; + epri->gate_addr = init->gate_addr; + epri->data_fd = -1; + epri->control_fd = -1; + epri->dev = dev; + + printk(KERN_INFO "ethertap backend - %s", epri->dev_name); + if (epri->gate_addr != NULL) + printk(KERN_CONT ", IP = %s", epri->gate_addr); + printk(KERN_CONT "\n"); +} + +static int etap_read(int fd, struct sk_buff *skb, struct uml_net_private *lp) +{ + int len; + + len = net_recvfrom(fd, skb_mac_header(skb), + skb->dev->mtu + 2 + ETH_HEADER_ETHERTAP); + if (len <= 0) + return(len); + + skb_pull(skb, 2); + len -= 2; + return len; +} + +static int etap_write(int fd, struct sk_buff *skb, struct uml_net_private *lp) +{ + skb_push(skb, 2); + return net_send(fd, skb->data, skb->len); +} + +const struct net_kern_info ethertap_kern_info = { + .init = etap_init, + .protocol = eth_protocol, + .read = etap_read, + .write = etap_write, +}; + +int ethertap_setup(char *str, char **mac_out, void *data) +{ + struct ethertap_init *init = data; + + *init = ((struct ethertap_init) + { .dev_name = NULL, + .gate_addr = NULL }); + if (tap_setup_common(str, "ethertap", &init->dev_name, mac_out, + &init->gate_addr)) + return 0; + if (init->dev_name == NULL) { + printk(KERN_ERR "ethertap_setup : Missing tap device name\n"); + return 0; + } + + return 1; +} + +static struct transport ethertap_transport = { + .list = LIST_HEAD_INIT(ethertap_transport.list), + .name = "ethertap", + .setup = ethertap_setup, + .user = ðertap_user_info, + .kern = ðertap_kern_info, + .private_size = sizeof(struct ethertap_data), + .setup_size = sizeof(struct ethertap_init), +}; + +static int register_ethertap(void) +{ + register_transport(ðertap_transport); + return 0; +} + +late_initcall(register_ethertap); diff --git a/arch/um/os-Linux/drivers/ethertap_user.c b/arch/um/os-Linux/drivers/ethertap_user.c new file mode 100644 index 0000000..cc72cb2 --- /dev/null +++ b/arch/um/os-Linux/drivers/ethertap_user.c @@ -0,0 +1,250 @@ +/* + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and + * James Leu (jleu@mindspring.net). + * Copyright (C) 2001 by various other people who didn't put their name here. + * Licensed under the GPL. + */ + +#include <stdio.h> +#include <unistd.h> +#include <errno.h> +#include <string.h> +#include <sys/socket.h> +#include <sys/wait.h> +#include "etap.h" +#include "kern_constants.h" +#include "os.h" +#include "net_user.h" +#include "um_malloc.h" +#include "user.h" + +#define MAX_PACKET ETH_MAX_PACKET + +static int etap_user_init(void *data, void *dev) +{ + struct ethertap_data *pri = data; + + pri->dev = dev; + return 0; +} + +struct addr_change { + enum { ADD_ADDR, DEL_ADDR } what; + unsigned char addr[4]; + unsigned char netmask[4]; +}; + +static void etap_change(int op, unsigned char *addr, unsigned char *netmask, + int fd) +{ + struct addr_change change; + char *output; + int n; + + change.what = op; + memcpy(change.addr, addr, sizeof(change.addr)); + memcpy(change.netmask, netmask, sizeof(change.netmask)); + CATCH_EINTR(n = write(fd, &change, sizeof(change))); + if (n != sizeof(change)) { + printk(UM_KERN_ERR "etap_change - request failed, err = %d\n", + errno); + return; + } + + output = uml_kmalloc(UM_KERN_PAGE_SIZE, UM_GFP_KERNEL); + if (output == NULL) + printk(UM_KERN_ERR "etap_change : Failed to allocate output " + "buffer\n"); + read_output(fd, output, UM_KERN_PAGE_SIZE); + if (output != NULL) { + printk("%s", output); + kfree(output); + } +} + +static void etap_open_addr(unsigned char *addr, unsigned char *netmask, + void *arg) +{ + etap_change(ADD_ADDR, addr, netmask, *((int *) arg)); +} + +static void etap_close_addr(unsigned char *addr, unsigned char *netmask, + void *arg) +{ + etap_change(DEL_ADDR, addr, netmask, *((int *) arg)); +} + +struct etap_pre_exec_data { + int control_remote; + int control_me; + int data_me; +}; + +static void etap_pre_exec(void *arg) +{ + struct etap_pre_exec_data *data = arg; + + dup2(data->control_remote, 1); + close(data->data_me); + close(data->control_me); +} + +static int etap_tramp(char *dev, char *gate, int control_me, + int control_remote, int data_me, int data_remote) +{ + struct etap_pre_exec_data pe_data; + int pid, err, n; + char version_buf[sizeof("nnnnn\0")]; + char data_fd_buf[sizeof("nnnnnn\0")]; + char gate_buf[sizeof("nnn.nnn.nnn.nnn\0")]; + char *setup_args[] = { "uml_net", version_buf, "ethertap", dev, + data_fd_buf, gate_buf, NULL }; + char *nosetup_args[] = { "uml_net", version_buf, "ethertap", + dev, data_fd_buf, NULL }; + char **args, c; + + sprintf(data_fd_buf, "%d", data_remote); + sprintf(version_buf, "%d", UML_NET_VERSION); + if (gate != NULL) { + strcpy(gate_buf, gate); + args = setup_args; + } + else args = nosetup_args; + + err = 0; + pe_data.control_remote = control_remote; + pe_data.control_me = control_me; + pe_data.data_me = data_me; + pid = run_helper(etap_pre_exec, &pe_data, args); + + if (pid < 0) + err = pid; + close(data_remote); + close(control_remote); + CATCH_EINTR(n = read(control_me, &c, sizeof(c))); + if (n != sizeof(c)) { + err = -errno; + printk(UM_KERN_ERR "etap_tramp : read of status failed, " + "err = %d\n", -err); + return err; + } + if (c != 1) { + printk(UM_KERN_ERR "etap_tramp : uml_net failed\n"); + err = helper_wait(pid); + } + return err; +} + +static int etap_open(void *data) +{ + struct ethertap_data *pri = data; + char *output; + int data_fds[2], control_fds[2], err, output_len; + + err = tap_open_common(pri->dev, pri->gate_addr); + if (err) + return err; + + err = socketpair(AF_UNIX, SOCK_DGRAM, 0, data_fds); + if (err) { + err = -errno; + printk(UM_KERN_ERR "etap_open - data socketpair failed - " + "err = %d\n", errno); + return err; + } + + err = socketpair(AF_UNIX, SOCK_STREAM, 0, control_fds); + if (err) { + err = -errno; + printk(UM_KERN_ERR "etap_open - control socketpair failed - " + "err = %d\n", errno); + goto out_close_data; + } + + err = etap_tramp(pri->dev_name, pri->gate_addr, control_fds[0], + control_fds[1], data_fds[0], data_fds[1]); + output_len = UM_KERN_PAGE_SIZE; + output = uml_kmalloc(output_len, UM_GFP_KERNEL); + read_output(control_fds[0], output, output_len); + + if (output == NULL) + printk(UM_KERN_ERR "etap_open : failed to allocate output " + "buffer\n"); + else { + printk("%s", output); + kfree(output); + } + + if (err < 0) { + printk(UM_KERN_ERR "etap_tramp failed - err = %d\n", -err); + goto out_close_control; + } + + pri->data_fd = data_fds[0]; + pri->control_fd = control_fds[0]; + iter_addresses(pri->dev, etap_open_addr, &pri->control_fd); + return data_fds[0]; + +out_close_control: + close(control_fds[0]); + close(control_fds[1]); +out_close_data: + close(data_fds[0]); + close(data_fds[1]); + return err; +} + +static void etap_close(int fd, void *data) +{ + struct ethertap_data *pri = data; + + iter_addresses(pri->dev, etap_close_addr, &pri->control_fd); + close(fd); + + if (shutdown(pri->data_fd, SHUT_RDWR) < 0) + printk(UM_KERN_ERR "etap_close - shutdown data socket failed, " + "errno = %d\n", errno); + + if (shutdown(pri->control_fd, SHUT_RDWR) < 0) + printk(UM_KERN_ERR "etap_close - shutdown control socket " + "failed, errno = %d\n", errno); + + close(pri->data_fd); + pri->data_fd = -1; + close(pri->control_fd); + pri->control_fd = -1; +} + +static void etap_add_addr(unsigned char *addr, unsigned char *netmask, + void *data) +{ + struct ethertap_data *pri = data; + + tap_check_ips(pri->gate_addr, addr); + if (pri->control_fd == -1) + return; + etap_open_addr(addr, netmask, &pri->control_fd); +} + +static void etap_del_addr(unsigned char *addr, unsigned char *netmask, + void *data) +{ + struct ethertap_data *pri = data; + + if (pri->control_fd == -1) + return; + + etap_close_addr(addr, netmask, &pri->control_fd); +} + +const struct net_user_info ethertap_user_info = { + .init = etap_user_init, + .open = etap_open, + .close = etap_close, + .remove = NULL, + .add_address = etap_add_addr, + .delete_address = etap_del_addr, + .mtu = ETH_MAX_PACKET, + .max_packet = ETH_MAX_PACKET + ETH_HEADER_ETHERTAP, +}; diff --git a/arch/um/os-Linux/drivers/tuntap.h b/arch/um/os-Linux/drivers/tuntap.h new file mode 100644 index 0000000..f17c315 --- /dev/null +++ b/arch/um/os-Linux/drivers/tuntap.h @@ -0,0 +1,21 @@ +/* + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#ifndef __UM_TUNTAP_H +#define __UM_TUNTAP_H + +#include "net_user.h" + +struct tuntap_data { + char *dev_name; + int fixed_config; + char *gate_addr; + int fd; + void *dev; +}; + +extern const struct net_user_info tuntap_user_info; + +#endif diff --git a/arch/um/os-Linux/drivers/tuntap_kern.c b/arch/um/os-Linux/drivers/tuntap_kern.c new file mode 100644 index 0000000..6b9e33d --- /dev/null +++ b/arch/um/os-Linux/drivers/tuntap_kern.c @@ -0,0 +1,86 @@ +/* + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include <linux/netdevice.h> +#include <linux/init.h> +#include <linux/skbuff.h> +#include <asm/errno.h> +#include "net_kern.h" +#include "tuntap.h" + +struct tuntap_init { + char *dev_name; + char *gate_addr; +}; + +static void tuntap_init(struct net_device *dev, void *data) +{ + struct uml_net_private *pri; + struct tuntap_data *tpri; + struct tuntap_init *init = data; + + pri = dev->priv; + tpri = (struct tuntap_data *) pri->user; + tpri->dev_name = init->dev_name; + tpri->fixed_config = (init->dev_name != NULL); + tpri->gate_addr = init->gate_addr; + tpri->fd = -1; + tpri->dev = dev; + + printk(KERN_INFO "TUN/TAP backend - "); + if (tpri->gate_addr != NULL) + printk(KERN_CONT "IP = %s", tpri->gate_addr); + printk(KERN_CONT "\n"); +} + +static int tuntap_read(int fd, struct sk_buff *skb, struct uml_net_private *lp) +{ + return net_read(fd, skb_mac_header(skb), + skb->dev->mtu + ETH_HEADER_OTHER); +} + +static int tuntap_write(int fd, struct sk_buff *skb, struct uml_net_private *lp) +{ + return net_write(fd, skb->data, skb->len); +} + +const struct net_kern_info tuntap_kern_info = { + .init = tuntap_init, + .protocol = eth_protocol, + .read = tuntap_read, + .write = tuntap_write, +}; + +int tuntap_setup(char *str, char **mac_out, void *data) +{ + struct tuntap_init *init = data; + + *init = ((struct tuntap_init) + { .dev_name = NULL, + .gate_addr = NULL }); + if (tap_setup_common(str, "tuntap", &init->dev_name, mac_out, + &init->gate_addr)) + return 0; + + return 1; +} + +static struct transport tuntap_transport = { + .list = LIST_HEAD_INIT(tuntap_transport.list), + .name = "tuntap", + .setup = tuntap_setup, + .user = &tuntap_user_info, + .kern = &tuntap_kern_info, + .private_size = sizeof(struct tuntap_data), + .setup_size = sizeof(struct tuntap_init), +}; + +static int register_tuntap(void) +{ + register_transport(&tuntap_transport); + return 0; +} + +late_initcall(register_tuntap); diff --git a/arch/um/os-Linux/drivers/tuntap_user.c b/arch/um/os-Linux/drivers/tuntap_user.c new file mode 100644 index 0000000..2448be0 --- /dev/null +++ b/arch/um/os-Linux/drivers/tuntap_user.c @@ -0,0 +1,217 @@ +/* + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include <stdio.h> +#include <unistd.h> +#include <errno.h> +#include <string.h> +#include <linux/if_tun.h> +#include <net/if.h> +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <sys/wait.h> +#include <sys/uio.h> +#include "kern_constants.h" +#include "kern_util.h" +#include "os.h" +#include "tuntap.h" +#include "user.h" + +static int tuntap_user_init(void *data, void *dev) +{ + struct tuntap_data *pri = data; + + pri->dev = dev; + return 0; +} + +static void tuntap_add_addr(unsigned char *addr, unsigned char *netmask, + void *data) +{ + struct tuntap_data *pri = data; + + tap_check_ips(pri->gate_addr, addr); + if ((pri->fd == -1) || pri->fixed_config) + return; + open_addr(addr, netmask, pri->dev_name); +} + +static void tuntap_del_addr(unsigned char *addr, unsigned char *netmask, + void *data) +{ + struct tuntap_data *pri = data; + + if ((pri->fd == -1) || pri->fixed_config) + return; + close_addr(addr, netmask, pri->dev_name); +} + +struct tuntap_pre_exec_data { + int stdout; + int close_me; +}; + +static void tuntap_pre_exec(void *arg) +{ + struct tuntap_pre_exec_data *data = arg; + + dup2(data->stdout, 1); + close(data->close_me); +} + +static int tuntap_open_tramp(char *gate, int *fd_out, int me, int remote, + char *buffer, int buffer_len, int *used_out) +{ + struct tuntap_pre_exec_data data; + char version_buf[sizeof("nnnnn\0")]; + char *argv[] = { "uml_net", version_buf, "tuntap", "up", gate, + NULL }; + char buf[CMSG_SPACE(sizeof(*fd_out))]; + struct msghdr msg; + struct cmsghdr *cmsg; + struct iovec iov; + int pid, n, err; + + sprintf(version_buf, "%d", UML_NET_VERSION); + + data.stdout = remote; + data.close_me = me; + + pid = run_helper(tuntap_pre_exec, &data, argv); + + if (pid < 0) + return -pid; + + close(remote); + + msg.msg_name = NULL; + msg.msg_namelen = 0; + if (buffer != NULL) { + iov = ((struct iovec) { buffer, buffer_len }); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + } + else { + msg.msg_iov = NULL; + msg.msg_iovlen = 0; + } + msg.msg_control = buf; + msg.msg_controllen = sizeof(buf); + msg.msg_flags = 0; + n = recvmsg(me, &msg, 0); + *used_out = n; + if (n < 0) { + err = -errno; + printk(UM_KERN_ERR "tuntap_open_tramp : recvmsg failed - " + "errno = %d\n", errno); + return err; + } + helper_wait(pid); + + cmsg = CMSG_FIRSTHDR(&msg); + if (cmsg == NULL) { + printk(UM_KERN_ERR "tuntap_open_tramp : didn't receive a " + "message\n"); + return -EINVAL; + } + if ((cmsg->cmsg_level != SOL_SOCKET) || + (cmsg->cmsg_type != SCM_RIGHTS)) { + printk(UM_KERN_ERR "tuntap_open_tramp : didn't receive a " + "descriptor\n"); + return -EINVAL; + } + *fd_out = ((int *) CMSG_DATA(cmsg))[0]; + os_set_exec_close(*fd_out); + return 0; +} + +static int tuntap_open(void *data) +{ + struct ifreq ifr; + struct tuntap_data *pri = data; + char *output, *buffer; + int err, fds[2], len, used; + + err = tap_open_common(pri->dev, pri->gate_addr); + if (err < 0) + return err; + + if (pri->fixed_config) { + pri->fd = os_open_file("/dev/net/tun", + of_cloexec(of_rdwr(OPENFLAGS())), 0); + if (pri->fd < 0) { + printk(UM_KERN_ERR "Failed to open /dev/net/tun, " + "err = %d\n", -pri->fd); + return pri->fd; + } + memset(&ifr, 0, sizeof(ifr)); + ifr.ifr_flags = IFF_TAP | IFF_NO_PI; + strlcpy(ifr.ifr_name, pri->dev_name, sizeof(ifr.ifr_name)); + if (ioctl(pri->fd, TUNSETIFF, &ifr) < 0) { + err = -errno; + printk(UM_KERN_ERR "TUNSETIFF failed, errno = %d\n", + errno); + close(pri->fd); + return err; + } + } + else { + err = socketpair(AF_UNIX, SOCK_DGRAM, 0, fds); + if (err) { + err = -errno; + printk(UM_KERN_ERR "tuntap_open : socketpair failed - " + "errno = %d\n", errno); + return err; + } + + buffer = get_output_buffer(&len); + if (buffer != NULL) + len--; + used = 0; + + err = tuntap_open_tramp(pri->gate_addr, &pri->fd, fds[0], + fds[1], buffer, len, &used); + + output = buffer; + if (err < 0) { + printk("%s", output); + free_output_buffer(buffer); + printk(UM_KERN_ERR "tuntap_open_tramp failed - " + "err = %d\n", -err); + return err; + } + + pri->dev_name = uml_strdup(buffer); + output += IFNAMSIZ; + printk("%s", output); + free_output_buffer(buffer); + + close(fds[0]); + iter_addresses(pri->dev, open_addr, pri->dev_name); + } + + return pri->fd; +} + +static void tuntap_close(int fd, void *data) +{ + struct tuntap_data *pri = data; + + if (!pri->fixed_config) + iter_addresses(pri->dev, close_addr, pri->dev_name); + close(fd); + pri->fd = -1; +} + +const struct net_user_info tuntap_user_info = { + .init = tuntap_user_init, + .open = tuntap_open, + .close = tuntap_close, + .remove = NULL, + .add_address = tuntap_add_addr, + .delete_address = tuntap_del_addr, + .mtu = ETH_MAX_PACKET, + .max_packet = ETH_MAX_PACKET + ETH_HEADER_OTHER, +}; diff --git a/arch/um/os-Linux/elf_aux.c b/arch/um/os-Linux/elf_aux.c new file mode 100644 index 0000000..608784d --- /dev/null +++ b/arch/um/os-Linux/elf_aux.c @@ -0,0 +1,79 @@ +/* + * arch/um/kernel/elf_aux.c + * + * Scan the Elf auxiliary vector provided by the host to extract + * information about vsyscall-page, etc. + * + * Copyright (C) 2004 Fujitsu Siemens Computers GmbH + * Author: Bodo Stroesser (bodo.stroesser@fujitsu-siemens.com) + */ +#include <elf.h> +#include <stddef.h> +#include "init.h" +#include "elf_user.h" +#include "mem_user.h" +#include <kern_constants.h> + +/* Use the one from the kernel - the host may miss it, if having old headers. */ +#if UM_ELF_CLASS == UM_ELFCLASS32 +typedef Elf32_auxv_t elf_auxv_t; +#else +typedef Elf64_auxv_t elf_auxv_t; +#endif + +/* These are initialized very early in boot and never changed */ +char * elf_aux_platform; +long elf_aux_hwcap; +unsigned long vsyscall_ehdr; +unsigned long vsyscall_end; +unsigned long __kernel_vsyscall; + +__init void scan_elf_aux( char **envp) +{ + long page_size = 0; + elf_auxv_t * auxv; + + while ( *envp++ != NULL) ; + + for ( auxv = (elf_auxv_t *)envp; auxv->a_type != AT_NULL; auxv++) { + switch ( auxv->a_type ) { + case AT_SYSINFO: + __kernel_vsyscall = auxv->a_un.a_val; + /* See if the page is under TASK_SIZE */ + if (__kernel_vsyscall < (unsigned long) envp) + __kernel_vsyscall = 0; + break; + case AT_SYSINFO_EHDR: + vsyscall_ehdr = auxv->a_un.a_val; + /* See if the page is under TASK_SIZE */ + if (vsyscall_ehdr < (unsigned long) envp) + vsyscall_ehdr = 0; + break; + case AT_HWCAP: + elf_aux_hwcap = auxv->a_un.a_val; + break; + case AT_PLATFORM: + /* elf.h removed the pointer elements from + * a_un, so we have to use a_val, which is + * all that's left. + */ + elf_aux_platform = + (char *) (long) auxv->a_un.a_val; + break; + case AT_PAGESZ: + page_size = auxv->a_un.a_val; + break; + } + } + if ( ! __kernel_vsyscall || ! vsyscall_ehdr || + ! elf_aux_hwcap || ! elf_aux_platform || + ! page_size || (vsyscall_ehdr % page_size) ) { + __kernel_vsyscall = 0; + vsyscall_ehdr = 0; + elf_aux_hwcap = 0; + elf_aux_platform = "i586"; + } + else { + vsyscall_end = vsyscall_ehdr + page_size; + } +} diff --git a/arch/um/os-Linux/execvp.c b/arch/um/os-Linux/execvp.c new file mode 100644 index 0000000..66e583a --- /dev/null +++ b/arch/um/os-Linux/execvp.c @@ -0,0 +1,149 @@ +/* Copyright (C) 2006 by Paolo Giarrusso - modified from glibc' execvp.c. + Original copyright notice follows: + + Copyright (C) 1991,92,1995-99,2002,2004 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ +#include <unistd.h> + +#include <stdbool.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <limits.h> + +#ifndef TEST +#include "um_malloc.h" +#else +#include <stdio.h> +#define um_kmalloc malloc +#endif +#include "os.h" + +/* Execute FILE, searching in the `PATH' environment variable if it contains + no slashes, with arguments ARGV and environment from `environ'. */ +int execvp_noalloc(char *buf, const char *file, char *const argv[]) +{ + if (*file == '\0') { + return -ENOENT; + } + + if (strchr (file, '/') != NULL) { + /* Don't search when it contains a slash. */ + execv(file, argv); + } else { + int got_eacces; + size_t len, pathlen; + char *name, *p; + char *path = getenv("PATH"); + if (path == NULL) + path = ":/bin:/usr/bin"; + + len = strlen(file) + 1; + pathlen = strlen(path); + /* Copy the file name at the top. */ + name = memcpy(buf + pathlen + 1, file, len); + /* And add the slash. */ + *--name = '/'; + + got_eacces = 0; + p = path; + do { + char *startp; + + path = p; + //Let's avoid this GNU extension. + //p = strchrnul (path, ':'); + p = strchr(path, ':'); + if (!p) + p = strchr(path, '\0'); + + if (p == path) + /* Two adjacent colons, or a colon at the beginning or the end + of `PATH' means to search the current directory. */ + startp = name + 1; + else + startp = memcpy(name - (p - path), path, p - path); + + /* Try to execute this name. If it works, execv will not return. */ + execv(startp, argv); + + /* + if (errno == ENOEXEC) { + } + */ + + switch (errno) { + case EACCES: + /* Record the we got a `Permission denied' error. If we end + up finding no executable we can use, we want to diagnose + that we did find one but were denied access. */ + got_eacces = 1; + case ENOENT: + case ESTALE: + case ENOTDIR: + /* Those errors indicate the file is missing or not executable + by us, in which case we want to just try the next path + directory. */ + case ENODEV: + case ETIMEDOUT: + /* Some strange filesystems like AFS return even + stranger error numbers. They cannot reasonably mean + anything else so ignore those, too. */ + case ENOEXEC: + /* We won't go searching for the shell + * if it is not executable - the Linux + * kernel already handles this enough, + * for us. */ + break; + + default: + /* Some other error means we found an executable file, but + something went wrong executing it; return the error to our + caller. */ + return -errno; + } + } while (*p++ != '\0'); + + /* We tried every element and none of them worked. */ + if (got_eacces) + /* At least one failure was due to permissions, so report that + error. */ + return -EACCES; + } + + /* Return the error from the last attempt (probably ENOENT). */ + return -errno; +} +#ifdef TEST +int main(int argc, char**argv) +{ + char buf[PATH_MAX]; + int ret; + argc--; + if (!argc) { + fprintf(stderr, "Not enough arguments\n"); + return 1; + } + argv++; + if (ret = execvp_noalloc(buf, argv[0], argv)) { + errno = -ret; + perror("execvp_noalloc"); + } + return 0; +} +#endif diff --git a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c new file mode 100644 index 0000000..b5afcfd --- /dev/null +++ b/arch/um/os-Linux/file.c @@ -0,0 +1,563 @@ +/* + * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include <stdio.h> +#include <unistd.h> +#include <errno.h> +#include <fcntl.h> +#include <signal.h> +#include <sys/ioctl.h> +#include <sys/mount.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/un.h> +#include "kern_constants.h" +#include "os.h" +#include "user.h" + +static void copy_stat(struct uml_stat *dst, const struct stat64 *src) +{ + *dst = ((struct uml_stat) { + .ust_dev = src->st_dev, /* device */ + .ust_ino = src->st_ino, /* inode */ + .ust_mode = src->st_mode, /* protection */ + .ust_nlink = src->st_nlink, /* number of hard links */ + .ust_uid = src->st_uid, /* user ID of owner */ + .ust_gid = src->st_gid, /* group ID of owner */ + .ust_size = src->st_size, /* total size, in bytes */ + .ust_blksize = src->st_blksize, /* blocksize for filesys I/O */ + .ust_blocks = src->st_blocks, /* number of blocks allocated */ + .ust_atime = src->st_atime, /* time of last access */ + .ust_mtime = src->st_mtime, /* time of last modification */ + .ust_ctime = src->st_ctime, /* time of last change */ + }); +} + +int os_stat_fd(const int fd, struct uml_stat *ubuf) +{ + struct stat64 sbuf; + int err; + + CATCH_EINTR(err = fstat64(fd, &sbuf)); + if (err < 0) + return -errno; + + if (ubuf != NULL) + copy_stat(ubuf, &sbuf); + return err; +} + +int os_stat_file(const char *file_name, struct uml_stat *ubuf) +{ + struct stat64 sbuf; + int err; + + CATCH_EINTR(err = stat64(file_name, &sbuf)); + if (err < 0) + return -errno; + + if (ubuf != NULL) + copy_stat(ubuf, &sbuf); + return err; +} + +int os_access(const char *file, int mode) +{ + int amode, err; + + amode = (mode & OS_ACC_R_OK ? R_OK : 0) | + (mode & OS_ACC_W_OK ? W_OK : 0) | + (mode & OS_ACC_X_OK ? X_OK : 0) | + (mode & OS_ACC_F_OK ? F_OK : 0); + + err = access(file, amode); + if (err < 0) + return -errno; + + return 0; +} + +/* FIXME? required only by hostaudio (because it passes ioctls verbatim) */ +int os_ioctl_generic(int fd, unsigned int cmd, unsigned long arg) +{ + int err; + + err = ioctl(fd, cmd, arg); + if (err < 0) + return -errno; + + return err; +} + +/* FIXME: ensure namebuf in os_get_if_name is big enough */ +int os_get_ifname(int fd, char* namebuf) +{ + if (ioctl(fd, SIOCGIFNAME, namebuf) < 0) + return -errno; + + return 0; +} + +int os_set_slip(int fd) +{ + int disc, sencap; + + disc = N_SLIP; + if (ioctl(fd, TIOCSETD, &disc) < 0) + return -errno; + + sencap = 0; + if (ioctl(fd, SIOCSIFENCAP, &sencap) < 0) + return -errno; + + return 0; +} + +int os_mode_fd(int fd, int mode) +{ + int err; + + CATCH_EINTR(err = fchmod(fd, mode)); + if (err < 0) + return -errno; + + return 0; +} + +int os_file_type(char *file) +{ + struct uml_stat buf; + int err; + + err = os_stat_file(file, &buf); + if (err < 0) + return err; + + if (S_ISDIR(buf.ust_mode)) + return OS_TYPE_DIR; + else if (S_ISLNK(buf.ust_mode)) + return OS_TYPE_SYMLINK; + else if (S_ISCHR(buf.ust_mode)) + return OS_TYPE_CHARDEV; + else if (S_ISBLK(buf.ust_mode)) + return OS_TYPE_BLOCKDEV; + else if (S_ISFIFO(buf.ust_mode)) + return OS_TYPE_FIFO; + else if (S_ISSOCK(buf.ust_mode)) + return OS_TYPE_SOCK; + else return OS_TYPE_FILE; +} + +int os_file_mode(const char *file, struct openflags *mode_out) +{ + int err; + + *mode_out = OPENFLAGS(); + + err = access(file, W_OK); + if (err && (errno != EACCES)) + return -errno; + else if (!err) + *mode_out = of_write(*mode_out); + + err = access(file, R_OK); + if (err && (errno != EACCES)) + return -errno; + else if (!err) + *mode_out = of_read(*mode_out); + + return err; +} + +int os_open_file(const char *file, struct openflags flags, int mode) +{ + int fd, err, f = 0; + + if (flags.r && flags.w) + f = O_RDWR; + else if (flags.r) + f = O_RDONLY; + else if (flags.w) + f = O_WRONLY; + else f = 0; + + if (flags.s) + f |= O_SYNC; + if (flags.c) + f |= O_CREAT; + if (flags.t) + f |= O_TRUNC; + if (flags.e) + f |= O_EXCL; + if (flags.a) + f |= O_APPEND; + + fd = open64(file, f, mode); + if (fd < 0) + return -errno; + + if (flags.cl && fcntl(fd, F_SETFD, 1)) { + err = -errno; + close(fd); + return err; + } + + return fd; +} + +int os_connect_socket(const char *name) +{ + struct sockaddr_un sock; + int fd, err; + + sock.sun_family = AF_UNIX; + snprintf(sock.sun_path, sizeof(sock.sun_path), "%s", name); + + fd = socket(AF_UNIX, SOCK_STREAM, 0); + if (fd < 0) { + err = -errno; + goto out; + } + + err = connect(fd, (struct sockaddr *) &sock, sizeof(sock)); + if (err) { + err = -errno; + goto out_close; + } + + return fd; + +out_close: + close(fd); +out: + return err; +} + +void os_close_file(int fd) +{ + close(fd); +} + +int os_seek_file(int fd, unsigned long long offset) +{ + unsigned long long actual; + + actual = lseek64(fd, offset, SEEK_SET); + if (actual != offset) + return -errno; + return 0; +} + +int os_read_file(int fd, void *buf, int len) +{ + int n = read(fd, buf, len); + + if (n < 0) + return -errno; + return n; +} + +int os_write_file(int fd, const void *buf, int len) +{ + int n = write(fd, (void *) buf, len); + + if (n < 0) + return -errno; + return n; +} + +int os_file_size(const char *file, unsigned long long *size_out) +{ + struct uml_stat buf; + int err; + + err = os_stat_file(file, &buf); + if (err < 0) { + printk(UM_KERN_ERR "Couldn't stat \"%s\" : err = %d\n", file, + -err); + return err; + } + + if (S_ISBLK(buf.ust_mode)) { + int fd; + long blocks; + + fd = open(file, O_RDONLY, 0); + if (fd < 0) { + err = -errno; + printk(UM_KERN_ERR "Couldn't open \"%s\", " + "errno = %d\n", file, errno); + return err; + } + if (ioctl(fd, BLKGETSIZE, &blocks) < 0) { + err = -errno; + printk(UM_KERN_ERR "Couldn't get the block size of " + "\"%s\", errno = %d\n", file, errno); + close(fd); + return err; + } + *size_out = ((long long) blocks) * 512; + close(fd); + } + else *size_out = buf.ust_size; + + return 0; +} + +int os_file_modtime(const char *file, unsigned long *modtime) +{ + struct uml_stat buf; + int err; + + err = os_stat_file(file, &buf); + if (err < 0) { + printk(UM_KERN_ERR "Couldn't stat \"%s\" : err = %d\n", file, + -err); + return err; + } + + *modtime = buf.ust_mtime; + return 0; +} + +int os_set_exec_close(int fd) +{ + int err; + + CATCH_EINTR(err = fcntl(fd, F_SETFD, FD_CLOEXEC)); + + if (err < 0) + return -errno; + return err; +} + +int os_pipe(int *fds, int stream, int close_on_exec) +{ + int err, type = stream ? SOCK_STREAM : SOCK_DGRAM; + + err = socketpair(AF_UNIX, type, 0, fds); + if (err < 0) + return -errno; + + if (!close_on_exec) + return 0; + + err = os_set_exec_close(fds[0]); + if (err < 0) + goto error; + + err = os_set_exec_close(fds[1]); + if (err < 0) + goto error; + + return 0; + + error: + printk(UM_KERN_ERR "os_pipe : Setting FD_CLOEXEC failed, err = %d\n", + -err); + close(fds[1]); + close(fds[0]); + return err; +} + +int os_set_fd_async(int fd) +{ + int err, flags; + + flags = fcntl(fd, F_GETFL); + if (flags < 0) + return -errno; + + flags |= O_ASYNC | O_NONBLOCK; + if (fcntl(fd, F_SETFL, flags) < 0) { + err = -errno; + printk(UM_KERN_ERR "os_set_fd_async : failed to set O_ASYNC " + "and O_NONBLOCK on fd # %d, errno = %d\n", fd, errno); + return err; + } + + if ((fcntl(fd, F_SETSIG, SIGIO) < 0) || + (fcntl(fd, F_SETOWN, os_getpid()) < 0)) { + err = -errno; + printk(UM_KERN_ERR "os_set_fd_async : Failed to fcntl F_SETOWN " + "(or F_SETSIG) fd %d, errno = %d\n", fd, errno); + return err; + } + + return 0; +} + +int os_clear_fd_async(int fd) +{ + int flags; + + flags = fcntl(fd, F_GETFL); + if (flags < 0) + return -errno; + + flags &= ~(O_ASYNC | O_NONBLOCK); + if (fcntl(fd, F_SETFL, flags) < 0) + return -errno; + return 0; +} + +int os_set_fd_block(int fd, int blocking) +{ + int flags; + + flags = fcntl(fd, F_GETFL); + if (flags < 0) + return -errno; + + if (blocking) + flags &= ~O_NONBLOCK; + else + flags |= O_NONBLOCK; + + if (fcntl(fd, F_SETFL, flags) < 0) + return -errno; + + return 0; +} + +int os_accept_connection(int fd) +{ + int new; + + new = accept(fd, NULL, 0); + if (new < 0) + return -errno; + return new; +} + +#ifndef SHUT_RD +#define SHUT_RD 0 +#endif + +#ifndef SHUT_WR +#define SHUT_WR 1 +#endif + +#ifndef SHUT_RDWR +#define SHUT_RDWR 2 +#endif + +int os_shutdown_socket(int fd, int r, int w) +{ + int what, err; + + if (r && w) + what = SHUT_RDWR; + else if (r) + what = SHUT_RD; + else if (w) + what = SHUT_WR; + else + return -EINVAL; + + err = shutdown(fd, what); + if (err < 0) + return -errno; + return 0; +} + +int os_rcv_fd(int fd, int *helper_pid_out) +{ + int new, n; + char buf[CMSG_SPACE(sizeof(new))]; + struct msghdr msg; + struct cmsghdr *cmsg; + struct iovec iov; + + msg.msg_name = NULL; + msg.msg_namelen = 0; + iov = ((struct iovec) { .iov_base = helper_pid_out, + .iov_len = sizeof(*helper_pid_out) }); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = buf; + msg.msg_controllen = sizeof(buf); + msg.msg_flags = 0; + + n = recvmsg(fd, &msg, 0); + if (n < 0) + return -errno; + else if (n != iov.iov_len) + *helper_pid_out = -1; + + cmsg = CMSG_FIRSTHDR(&msg); + if (cmsg == NULL) { + printk(UM_KERN_ERR "rcv_fd didn't receive anything, " + "error = %d\n", errno); + return -1; + } + if ((cmsg->cmsg_level != SOL_SOCKET) || + (cmsg->cmsg_type != SCM_RIGHTS)) { + printk(UM_KERN_ERR "rcv_fd didn't receive a descriptor\n"); + return -1; + } + + new = ((int *) CMSG_DATA(cmsg))[0]; + return new; +} + +int os_create_unix_socket(const char *file, int len, int close_on_exec) +{ + struct sockaddr_un addr; + int sock, err; + + sock = socket(PF_UNIX, SOCK_DGRAM, 0); + if (sock < 0) + return -errno; + + if (close_on_exec) { + err = os_set_exec_close(sock); + if (err < 0) + printk(UM_KERN_ERR "create_unix_socket : " + "close_on_exec failed, err = %d", -err); + } + + addr.sun_family = AF_UNIX; + + snprintf(addr.sun_path, len, "%s", file); + + err = bind(sock, (struct sockaddr *) &addr, sizeof(addr)); + if (err < 0) + return -errno; + + return sock; +} + +void os_flush_stdout(void) +{ + fflush(stdout); +} + +int os_lock_file(int fd, int excl) +{ + int type = excl ? F_WRLCK : F_RDLCK; + struct flock lock = ((struct flock) { .l_type = type, + .l_whence = SEEK_SET, + .l_start = 0, + .l_len = 0 } ); + int err, save; + + err = fcntl(fd, F_SETLK, &lock); + if (!err) + goto out; + + save = -errno; + err = fcntl(fd, F_GETLK, &lock); + if (err) { + err = -errno; + goto out; + } + + printk(UM_KERN_ERR "F_SETLK failed, file already locked by pid %d\n", + lock.l_pid); + err = save; + out: + return err; +} diff --git a/arch/um/os-Linux/helper.c b/arch/um/os-Linux/helper.c new file mode 100644 index 0000000..30860b8 --- /dev/null +++ b/arch/um/os-Linux/helper.c @@ -0,0 +1,167 @@ +/* + * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include <stdlib.h> +#include <unistd.h> +#include <errno.h> +#include <sched.h> +#include <linux/limits.h> +#include <sys/socket.h> +#include <sys/wait.h> +#include "kern_constants.h" +#include "kern_util.h" +#include "os.h" +#include "um_malloc.h" +#include "user.h" +#include <linux/limits.h> + +struct helper_data { + void (*pre_exec)(void*); + void *pre_data; + char **argv; + int fd; + char *buf; +}; + +static int helper_child(void *arg) +{ + struct helper_data *data = arg; + char **argv = data->argv; + int err; + + if (data->pre_exec != NULL) + (*data->pre_exec)(data->pre_data); + err = execvp_noalloc(data->buf, argv[0], argv); + + /* If the exec succeeds, we don't get here */ + write(data->fd, &err, sizeof(err)); + + return 0; +} + +/* Returns either the pid of the child process we run or -E* on failure. */ +int run_helper(void (*pre_exec)(void *), void *pre_data, char **argv) +{ + struct helper_data data; + unsigned long stack, sp; + int pid, fds[2], ret, n; + + stack = alloc_stack(0, __cant_sleep()); + if (stack == 0) + return -ENOMEM; + + ret = socketpair(AF_UNIX, SOCK_STREAM, 0, fds); + if (ret < 0) { + ret = -errno; + printk(UM_KERN_ERR "run_helper : pipe failed, errno = %d\n", + errno); + goto out_free; + } + + ret = os_set_exec_close(fds[1]); + if (ret < 0) { + printk(UM_KERN_ERR "run_helper : setting FD_CLOEXEC failed, " + "ret = %d\n", -ret); + goto out_close; + } + + sp = stack + UM_KERN_PAGE_SIZE - sizeof(void *); + data.pre_exec = pre_exec; + data.pre_data = pre_data; + data.argv = argv; + data.fd = fds[1]; + data.buf = __cant_sleep() ? uml_kmalloc(PATH_MAX, UM_GFP_ATOMIC) : + uml_kmalloc(PATH_MAX, UM_GFP_KERNEL); + pid = clone(helper_child, (void *) sp, CLONE_VM, &data); + if (pid < 0) { + ret = -errno; + printk(UM_KERN_ERR "run_helper : clone failed, errno = %d\n", + errno); + goto out_free2; + } + + close(fds[1]); + fds[1] = -1; + + /* + * Read the errno value from the child, if the exec failed, or get 0 if + * the exec succeeded because the pipe fd was set as close-on-exec. + */ + n = read(fds[0], &ret, sizeof(ret)); + if (n == 0) { + ret = pid; + } else { + if (n < 0) { + n = -errno; + printk(UM_KERN_ERR "run_helper : read on pipe failed, " + "ret = %d\n", -n); + ret = n; + } + CATCH_EINTR(waitpid(pid, NULL, __WCLONE)); + } + +out_free2: + kfree(data.buf); +out_close: + if (fds[1] != -1) + close(fds[1]); + close(fds[0]); +out_free: + free_stack(stack, 0); + return ret; +} + +int run_helper_thread(int (*proc)(void *), void *arg, unsigned int flags, + unsigned long *stack_out) +{ + unsigned long stack, sp; + int pid, status, err; + + stack = alloc_stack(0, __cant_sleep()); + if (stack == 0) + return -ENOMEM; + + sp = stack + UM_KERN_PAGE_SIZE - sizeof(void *); + pid = clone(proc, (void *) sp, flags, arg); + if (pid < 0) { + err = -errno; + printk(UM_KERN_ERR "run_helper_thread : clone failed, " + "errno = %d\n", errno); + return err; + } + if (stack_out == NULL) { + CATCH_EINTR(pid = waitpid(pid, &status, __WCLONE)); + if (pid < 0) { + err = -errno; + printk(UM_KERN_ERR "run_helper_thread - wait failed, " + "errno = %d\n", errno); + pid = err; + } + if (!WIFEXITED(status) || (WEXITSTATUS(status) != 0)) + printk(UM_KERN_ERR "run_helper_thread - thread " + "returned status 0x%x\n", status); + free_stack(stack, 0); + } else + *stack_out = stack; + return pid; +} + +int helper_wait(int pid) +{ + int ret, status; + int wflags = __WCLONE; + + CATCH_EINTR(ret = waitpid(pid, &status, wflags)); + if (ret < 0) { + printk(UM_KERN_ERR "helper_wait : waitpid process %d failed, " + "errno = %d\n", pid, errno); + return -errno; + } else if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) { + printk(UM_KERN_ERR "helper_wait : process %d exited with " + "status 0x%x\n", pid, status); + return -ECHILD; + } else + return 0; +} diff --git a/arch/um/os-Linux/irq.c b/arch/um/os-Linux/irq.c new file mode 100644 index 0000000..0348b97 --- /dev/null +++ b/arch/um/os-Linux/irq.c @@ -0,0 +1,138 @@ +/* + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include <stdlib.h> +#include <errno.h> +#include <poll.h> +#include <signal.h> +#include <string.h> +#include "irq_user.h" +#include "kern_constants.h" +#include "os.h" +#include "process.h" +#include "um_malloc.h" +#include "user.h" + +/* + * Locked by irq_lock in arch/um/kernel/irq.c. Changed by os_create_pollfd + * and os_free_irq_by_cb, which are called under irq_lock. + */ +static struct pollfd *pollfds = NULL; +static int pollfds_num = 0; +static int pollfds_size = 0; + +int os_waiting_for_events(struct irq_fd *active_fds) +{ + struct irq_fd *irq_fd; + int i, n, err; + + n = poll(pollfds, pollfds_num, 0); + if (n < 0) { + err = -errno; + if (errno != EINTR) + printk(UM_KERN_ERR "os_waiting_for_events:" + " poll returned %d, errno = %d\n", n, errno); + return err; + } + + if (n == 0) + return 0; + + irq_fd = active_fds; + + for (i = 0; i < pollfds_num; i++) { + if (pollfds[i].revents != 0) { + irq_fd->current_events = pollfds[i].revents; + pollfds[i].fd = -1; + } + irq_fd = irq_fd->next; + } + return n; +} + +int os_create_pollfd(int fd, int events, void *tmp_pfd, int size_tmpfds) +{ + if (pollfds_num == pollfds_size) { + if (size_tmpfds <= pollfds_size * sizeof(pollfds[0])) { + /* return min size needed for new pollfds area */ + return (pollfds_size + 1) * sizeof(pollfds[0]); + } + + if (pollfds != NULL) { + memcpy(tmp_pfd, pollfds, + sizeof(pollfds[0]) * pollfds_size); + /* remove old pollfds */ + kfree(pollfds); + } + pollfds = tmp_pfd; + pollfds_size++; + } else + kfree(tmp_pfd); /* remove not used tmp_pfd */ + + pollfds[pollfds_num] = ((struct pollfd) { .fd = fd, + .events = events, + .revents = 0 }); + pollfds_num++; + + return 0; +} + +void os_free_irq_by_cb(int (*test)(struct irq_fd *, void *), void *arg, + struct irq_fd *active_fds, struct irq_fd ***last_irq_ptr2) +{ + struct irq_fd **prev; + int i = 0; + + prev = &active_fds; + while (*prev != NULL) { + if ((*test)(*prev, arg)) { + struct irq_fd *old_fd = *prev; + if ((pollfds[i].fd != -1) && + (pollfds[i].fd != (*prev)->fd)) { + printk(UM_KERN_ERR "os_free_irq_by_cb - " + "mismatch between active_fds and " + "pollfds, fd %d vs %d\n", + (*prev)->fd, pollfds[i].fd); + goto out; + } + + pollfds_num--; + + /* + * This moves the *whole* array after pollfds[i] + * (though it doesn't spot as such)! + */ + memmove(&pollfds[i], &pollfds[i + 1], + (pollfds_num - i) * sizeof(pollfds[0])); + if (*last_irq_ptr2 == &old_fd->next) + *last_irq_ptr2 = prev; + + *prev = (*prev)->next; + if (old_fd->type == IRQ_WRITE) + ignore_sigio_fd(old_fd->fd); + kfree(old_fd); + continue; + } + prev = &(*prev)->next; + i++; + } + out: + return; +} + +int os_get_pollfd(int i) +{ + return pollfds[i].fd; +} + +void os_set_pollfd(int i, int fd) +{ + pollfds[i].fd = fd; +} + +void os_set_ioignore(void) +{ + signal(SIGIO, SIG_IGN); +} diff --git a/arch/um/os-Linux/main.c b/arch/um/os-Linux/main.c new file mode 100644 index 0000000..eee69b9 --- /dev/null +++ b/arch/um/os-Linux/main.c @@ -0,0 +1,261 @@ +/* + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <errno.h> +#include <signal.h> +#include <string.h> +#include <sys/resource.h> +#include "as-layout.h" +#include "init.h" +#include "kern_constants.h" +#include "kern_util.h" +#include "os.h" +#include "um_malloc.h" + +#define PGD_BOUND (4 * 1024 * 1024) +#define STACKSIZE (8 * 1024 * 1024) +#define THREAD_NAME_LEN (256) + +static void set_stklim(void) +{ + struct rlimit lim; + + if (getrlimit(RLIMIT_STACK, &lim) < 0) { + perror("getrlimit"); + exit(1); + } + if ((lim.rlim_cur == RLIM_INFINITY) || (lim.rlim_cur > STACKSIZE)) { + lim.rlim_cur = STACKSIZE; + if (setrlimit(RLIMIT_STACK, &lim) < 0) { + perror("setrlimit"); + exit(1); + } + } +} + +static __init void do_uml_initcalls(void) +{ + initcall_t *call; + + call = &__uml_initcall_start; + while (call < &__uml_initcall_end) { + (*call)(); + call++; + } +} + +static void last_ditch_exit(int sig) +{ + uml_cleanup(); + exit(1); +} + +static void install_fatal_handler(int sig) +{ + struct sigaction action; + + /* All signals are enabled in this handler ... */ + sigemptyset(&action.sa_mask); + + /* + * ... including the signal being handled, plus we want the + * handler reset to the default behavior, so that if an exit + * handler is hanging for some reason, the UML will just die + * after this signal is sent a second time. + */ + action.sa_flags = SA_RESETHAND | SA_NODEFER; + action.sa_restorer = NULL; + action.sa_handler = last_ditch_exit; + if (sigaction(sig, &action, NULL) < 0) { + printf("failed to install handler for signal %d - errno = %d\n", + sig, errno); + exit(1); + } +} + +#define UML_LIB_PATH ":/usr/lib/uml" + +static void setup_env_path(void) +{ + char *new_path = NULL; + char *old_path = NULL; + int path_len = 0; + + old_path = getenv("PATH"); + /* + * if no PATH variable is set or it has an empty value + * just use the default + /usr/lib/uml + */ + if (!old_path || (path_len = strlen(old_path)) == 0) { + if (putenv("PATH=:/bin:/usr/bin/" UML_LIB_PATH)) + perror("couldn't putenv"); + return; + } + + /* append /usr/lib/uml to the existing path */ + path_len += strlen("PATH=" UML_LIB_PATH) + 1; + new_path = malloc(path_len); + if (!new_path) { + perror("couldn't malloc to set a new PATH"); + return; + } + snprintf(new_path, path_len, "PATH=%s" UML_LIB_PATH, old_path); + if (putenv(new_path)) { + perror("couldn't putenv to set a new PATH"); + free(new_path); + } +} + +extern void scan_elf_aux( char **envp); + +int __init main(int argc, char **argv, char **envp) +{ + char **new_argv; + int ret, i, err; + + set_stklim(); + + setup_env_path(); + + new_argv = malloc((argc + 1) * sizeof(char *)); + if (new_argv == NULL) { + perror("Mallocing argv"); + exit(1); + } + for (i = 0; i < argc; i++) { + new_argv[i] = strdup(argv[i]); + if (new_argv[i] == NULL) { + perror("Mallocing an arg"); + exit(1); + } + } + new_argv[argc] = NULL; + + /* + * Allow these signals to bring down a UML if all other + * methods of control fail. + */ + install_fatal_handler(SIGINT); + install_fatal_handler(SIGTERM); + install_fatal_handler(SIGHUP); + + scan_elf_aux(envp); + + do_uml_initcalls(); + ret = linux_main(argc, argv); + + /* + * Disable SIGPROF - I have no idea why libc doesn't do this or turn + * off the profiling time, but UML dies with a SIGPROF just before + * exiting when profiling is active. + */ + change_sig(SIGPROF, 0); + + /* + * This signal stuff used to be in the reboot case. However, + * sometimes a SIGVTALRM can come in when we're halting (reproducably + * when writing out gcov information, presumably because that takes + * some time) and cause a segfault. + */ + + /* stop timers and set SIGVTALRM to be ignored */ + disable_timer(); + + /* disable SIGIO for the fds and set SIGIO to be ignored */ + err = deactivate_all_fds(); + if (err) + printf("deactivate_all_fds failed, errno = %d\n", -err); + + /* + * Let any pending signals fire now. This ensures + * that they won't be delivered after the exec, when + * they are definitely not expected. + */ + unblock_signals(); + + /* Reboot */ + if (ret) { + printf("\n"); + execvp(new_argv[0], new_argv); + perror("Failed to exec kernel"); + ret = 1; + } + printf("\n"); + return uml_exitcode; +} + +extern void *__real_malloc(int); + +void *__wrap_malloc(int size) +{ + void *ret; + + if (!kmalloc_ok) + return __real_malloc(size); + else if (size <= UM_KERN_PAGE_SIZE) + /* finding contiguous pages can be hard*/ + ret = uml_kmalloc(size, UM_GFP_KERNEL); + else ret = vmalloc(size); + + /* + * glibc people insist that if malloc fails, errno should be + * set by malloc as well. So we do. + */ + if (ret == NULL) + errno = ENOMEM; + + return ret; +} + +void *__wrap_calloc(int n, int size) +{ + void *ptr = __wrap_malloc(n * size); + + if (ptr == NULL) + return NULL; + memset(ptr, 0, n * size); + return ptr; +} + +extern void __real_free(void *); + +extern unsigned long high_physmem; + +void __wrap_free(void *ptr) +{ + unsigned long addr = (unsigned long) ptr; + + /* + * We need to know how the allocation happened, so it can be correctly + * freed. This is done by seeing what region of memory the pointer is + * in - + * physical memory - kmalloc/kfree + * kernel virtual memory - vmalloc/vfree + * anywhere else - malloc/free + * If kmalloc is not yet possible, then either high_physmem and/or + * end_vm are still 0 (as at startup), in which case we call free, or + * we have set them, but anyway addr has not been allocated from those + * areas. So, in both cases __real_free is called. + * + * CAN_KMALLOC is checked because it would be bad to free a buffer + * with kmalloc/vmalloc after they have been turned off during + * shutdown. + * XXX: However, we sometimes shutdown CAN_KMALLOC temporarily, so + * there is a possibility for memory leaks. + */ + + if ((addr >= uml_physmem) && (addr < high_physmem)) { + if (kmalloc_ok) + kfree(ptr); + } + else if ((addr >= start_vm) && (addr < end_vm)) { + if (kmalloc_ok) + vfree(ptr); + } + else __real_free(ptr); +} diff --git a/arch/um/os-Linux/mem.c b/arch/um/os-Linux/mem.c new file mode 100644 index 0000000..93a11d7 --- /dev/null +++ b/arch/um/os-Linux/mem.c @@ -0,0 +1,278 @@ +/* + * Copyright (C) 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include <stdio.h> +#include <stddef.h> +#include <stdlib.h> +#include <unistd.h> +#include <errno.h> +#include <fcntl.h> +#include <string.h> +#include <sys/mman.h> +#include <sys/param.h> +#include "init.h" +#include "kern_constants.h" +#include "os.h" +#include "user.h" + +/* Modified by which_tmpdir, which is called during early boot */ +static char *default_tmpdir = "/tmp"; + +/* + * Modified when creating the physical memory file and when checking + * the tmp filesystem for usability, both happening during early boot. + */ +static char *tempdir = NULL; + +static void __init find_tempdir(void) +{ + const char *dirs[] = { "TMP", "TEMP", "TMPDIR", NULL }; + int i; + char *dir = NULL; + + if (tempdir != NULL) + /* We've already been called */ + return; + for (i = 0; dirs[i]; i++) { + dir = getenv(dirs[i]); + if ((dir != NULL) && (*dir != '\0')) + break; + } + if ((dir == NULL) || (*dir == '\0')) + dir = default_tmpdir; + + tempdir = malloc(strlen(dir) + 2); + if (tempdir == NULL) { + fprintf(stderr, "Failed to malloc tempdir, " + "errno = %d\n", errno); + return; + } + strcpy(tempdir, dir); + strcat(tempdir, "/"); +} + +/* + * This will return 1, with the first character in buf being the + * character following the next instance of c in the file. This will + * read the file as needed. If there's an error, -errno is returned; + * if the end of the file is reached, 0 is returned. + */ +static int next(int fd, char *buf, size_t size, char c) +{ + ssize_t n; + size_t len; + char *ptr; + + while ((ptr = strchr(buf, c)) == NULL) { + n = read(fd, buf, size - 1); + if (n == 0) + return 0; + else if (n < 0) + return -errno; + + buf[n] = '\0'; + } + + ptr++; + len = strlen(ptr); + memmove(buf, ptr, len + 1); + + /* + * Refill the buffer so that if there's a partial string that we care + * about, it will be completed, and we can recognize it. + */ + n = read(fd, &buf[len], size - len - 1); + if (n < 0) + return -errno; + + buf[len + n] = '\0'; + return 1; +} + +/* which_tmpdir is called only during early boot */ +static int checked_tmpdir = 0; + +/* + * Look for a tmpfs mounted at /dev/shm. I couldn't find a cleaner + * way to do this than to parse /proc/mounts. statfs will return the + * same filesystem magic number and fs id for both /dev and /dev/shm + * when they are both tmpfs, so you can't tell if they are different + * filesystems. Also, there seems to be no other way of finding the + * mount point of a filesystem from within it. + * + * If a /dev/shm tmpfs entry is found, then we switch to using it. + * Otherwise, we stay with the default /tmp. + */ +static void which_tmpdir(void) +{ + int fd, found; + char buf[128] = { '\0' }; + + if (checked_tmpdir) + return; + + checked_tmpdir = 1; + + printf("Checking for tmpfs mount on /dev/shm..."); + + fd = open("/proc/mounts", O_RDONLY); + if (fd < 0) { + printf("failed to open /proc/mounts, errno = %d\n", errno); + return; + } + + while (1) { + found = next(fd, buf, ARRAY_SIZE(buf), ' '); + if (found != 1) + break; + + if (!strncmp(buf, "/dev/shm", strlen("/dev/shm"))) + goto found; + + found = next(fd, buf, ARRAY_SIZE(buf), '\n'); + if (found != 1) + break; + } + +err: + if (found == 0) + printf("nothing mounted on /dev/shm\n"); + else if (found < 0) + printf("read returned errno %d\n", -found); + +out: + close(fd); + + return; + +found: + found = next(fd, buf, ARRAY_SIZE(buf), ' '); + if (found != 1) + goto err; + + if (strncmp(buf, "tmpfs", strlen("tmpfs"))) { + printf("not tmpfs\n"); + goto out; + } + + printf("OK\n"); + default_tmpdir = "/dev/shm"; + goto out; +} + +static int __init make_tempfile(const char *template, char **out_tempname, + int do_unlink) +{ + char *tempname; + int fd; + + which_tmpdir(); + tempname = malloc(MAXPATHLEN); + if (tempname == NULL) + return -1; + + find_tempdir(); + if ((tempdir == NULL) || (strlen(tempdir) >= MAXPATHLEN)) + return -1; + + if (template[0] != '/') + strcpy(tempname, tempdir); + else + tempname[0] = '\0'; + strncat(tempname, template, MAXPATHLEN-1-strlen(tempname)); + fd = mkstemp(tempname); + if (fd < 0) { + fprintf(stderr, "open - cannot create %s: %s\n", tempname, + strerror(errno)); + goto out; + } + if (do_unlink && (unlink(tempname) < 0)) { + perror("unlink"); + goto out; + } + if (out_tempname) { + *out_tempname = tempname; + } else + free(tempname); + return fd; +out: + free(tempname); + return -1; +} + +#define TEMPNAME_TEMPLATE "vm_file-XXXXXX" + +static int __init create_tmp_file(unsigned long long len) +{ + int fd, err; + char zero; + + fd = make_tempfile(TEMPNAME_TEMPLATE, NULL, 1); + if (fd < 0) + exit(1); + + err = fchmod(fd, 0777); + if (err < 0) { + perror("fchmod"); + exit(1); + } + + /* + * Seek to len - 1 because writing a character there will + * increase the file size by one byte, to the desired length. + */ + if (lseek64(fd, len - 1, SEEK_SET) < 0) { + perror("lseek64"); + exit(1); + } + + zero = 0; + + err = write(fd, &zero, 1); + if (err != 1) { + perror("write"); + exit(1); + } + + return fd; +} + +int __init create_mem_file(unsigned long long len) +{ + int err, fd; + + fd = create_tmp_file(len); + + err = os_set_exec_close(fd); + if (err < 0) { + errno = -err; + perror("exec_close"); + } + return fd; +} + + +void __init check_tmpexec(void) +{ + void *addr; + int err, fd = create_tmp_file(UM_KERN_PAGE_SIZE); + + addr = mmap(NULL, UM_KERN_PAGE_SIZE, + PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE, fd, 0); + printf("Checking PROT_EXEC mmap in %s...",tempdir); + fflush(stdout); + if (addr == MAP_FAILED) { + err = errno; + perror("failed"); + close(fd); + if (err == EPERM) + printf("%s must be not mounted noexec\n",tempdir); + exit(1); + } + printf("OK\n"); + munmap(addr, UM_KERN_PAGE_SIZE); + + close(fd); +} diff --git a/arch/um/os-Linux/process.c b/arch/um/os-Linux/process.c new file mode 100644 index 0000000..e0477c3 --- /dev/null +++ b/arch/um/os-Linux/process.c @@ -0,0 +1,269 @@ +/* + * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include <stdio.h> +#include <unistd.h> +#include <errno.h> +#include <signal.h> +#include <fcntl.h> +#include <sys/mman.h> +#include <sys/ptrace.h> +#include <sys/wait.h> +#include <asm/unistd.h> +#include "init.h" +#include "kern_constants.h" +#include "longjmp.h" +#include "os.h" +#include "process.h" +#include "skas_ptrace.h" +#include "user.h" + +#define ARBITRARY_ADDR -1 +#define FAILURE_PID -1 + +#define STAT_PATH_LEN sizeof("/proc/#######/stat\0") +#define COMM_SCANF "%*[^)])" + +unsigned long os_process_pc(int pid) +{ + char proc_stat[STAT_PATH_LEN], buf[256]; + unsigned long pc = ARBITRARY_ADDR; + int fd, err; + + sprintf(proc_stat, "/proc/%d/stat", pid); + fd = open(proc_stat, O_RDONLY, 0); + if (fd < 0) { + printk(UM_KERN_ERR "os_process_pc - couldn't open '%s', " + "errno = %d\n", proc_stat, errno); + goto out; + } + CATCH_EINTR(err = read(fd, buf, sizeof(buf))); + if (err < 0) { + printk(UM_KERN_ERR "os_process_pc - couldn't read '%s', " + "err = %d\n", proc_stat, errno); + goto out_close; + } + os_close_file(fd); + pc = ARBITRARY_ADDR; + if (sscanf(buf, "%*d " COMM_SCANF " %*c %*d %*d %*d %*d %*d %*d %*d " + "%*d %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d " + "%*d %*d %*d %*d %*d %lu", &pc) != 1) + printk(UM_KERN_ERR "os_process_pc - couldn't find pc in '%s'\n", + buf); + out_close: + close(fd); + out: + return pc; +} + +int os_process_parent(int pid) +{ + char stat[STAT_PATH_LEN]; + char data[256]; + int parent = FAILURE_PID, n, fd; + + if (pid == -1) + return parent; + + snprintf(stat, sizeof(stat), "/proc/%d/stat", pid); + fd = open(stat, O_RDONLY, 0); + if (fd < 0) { + printk(UM_KERN_ERR "Couldn't open '%s', errno = %d\n", stat, + errno); + return parent; + } + + CATCH_EINTR(n = read(fd, data, sizeof(data))); + close(fd); + + if (n < 0) { + printk(UM_KERN_ERR "Couldn't read '%s', errno = %d\n", stat, + errno); + return parent; + } + + parent = FAILURE_PID; + n = sscanf(data, "%*d " COMM_SCANF " %*c %d", &parent); + if (n != 1) + printk(UM_KERN_ERR "Failed to scan '%s'\n", data); + + return parent; +} + +void os_stop_process(int pid) +{ + kill(pid, SIGSTOP); +} + +void os_kill_process(int pid, int reap_child) +{ + kill(pid, SIGKILL); + if (reap_child) + CATCH_EINTR(waitpid(pid, NULL, __WALL)); +} + +/* This is here uniquely to have access to the userspace errno, i.e. the one + * used by ptrace in case of error. + */ + +long os_ptrace_ldt(long pid, long addr, long data) +{ + int ret; + + ret = ptrace(PTRACE_LDT, pid, addr, data); + + if (ret < 0) + return -errno; + return ret; +} + +/* Kill off a ptraced child by all means available. kill it normally first, + * then PTRACE_KILL it, then PTRACE_CONT it in case it's in a run state from + * which it can't exit directly. + */ + +void os_kill_ptraced_process(int pid, int reap_child) +{ + kill(pid, SIGKILL); + ptrace(PTRACE_KILL, pid); + ptrace(PTRACE_CONT, pid); + if (reap_child) + CATCH_EINTR(waitpid(pid, NULL, __WALL)); +} + +/* Don't use the glibc version, which caches the result in TLS. It misses some + * syscalls, and also breaks with clone(), which does not unshare the TLS. + */ + +int os_getpid(void) +{ + return syscall(__NR_getpid); +} + +int os_getpgrp(void) +{ + return getpgrp(); +} + +int os_map_memory(void *virt, int fd, unsigned long long off, unsigned long len, + int r, int w, int x) +{ + void *loc; + int prot; + + prot = (r ? PROT_READ : 0) | (w ? PROT_WRITE : 0) | + (x ? PROT_EXEC : 0); + + loc = mmap64((void *) virt, len, prot, MAP_SHARED | MAP_FIXED, + fd, off); + if (loc == MAP_FAILED) + return -errno; + return 0; +} + +int os_protect_memory(void *addr, unsigned long len, int r, int w, int x) +{ + int prot = ((r ? PROT_READ : 0) | (w ? PROT_WRITE : 0) | + (x ? PROT_EXEC : 0)); + + if (mprotect(addr, len, prot) < 0) + return -errno; + + return 0; +} + +int os_unmap_memory(void *addr, int len) +{ + int err; + + err = munmap(addr, len); + if (err < 0) + return -errno; + return 0; +} + +#ifndef MADV_REMOVE +#define MADV_REMOVE KERNEL_MADV_REMOVE +#endif + +int os_drop_memory(void *addr, int length) +{ + int err; + + err = madvise(addr, length, MADV_REMOVE); + if (err < 0) + err = -errno; + return err; +} + +int __init can_drop_memory(void) +{ + void *addr; + int fd, ok = 0; + + printk(UM_KERN_INFO "Checking host MADV_REMOVE support..."); + fd = create_mem_file(UM_KERN_PAGE_SIZE); + if (fd < 0) { + printk(UM_KERN_ERR "Creating test memory file failed, " + "err = %d\n", -fd); + goto out; + } + + addr = mmap64(NULL, UM_KERN_PAGE_SIZE, PROT_READ | PROT_WRITE, + MAP_SHARED, fd, 0); + if (addr == MAP_FAILED) { + printk(UM_KERN_ERR "Mapping test memory file failed, " + "err = %d\n", -errno); + goto out_close; + } + + if (madvise(addr, UM_KERN_PAGE_SIZE, MADV_REMOVE) != 0) { + printk(UM_KERN_ERR "MADV_REMOVE failed, err = %d\n", -errno); + goto out_unmap; + } + + printk(UM_KERN_CONT "OK\n"); + ok = 1; + +out_unmap: + munmap(addr, UM_KERN_PAGE_SIZE); +out_close: + close(fd); +out: + return ok; +} + +void init_new_thread_signals(void) +{ + set_handler(SIGSEGV, (__sighandler_t) sig_handler, SA_ONSTACK, + SIGUSR1, SIGIO, SIGWINCH, SIGVTALRM, -1); + set_handler(SIGTRAP, (__sighandler_t) sig_handler, SA_ONSTACK, + SIGUSR1, SIGIO, SIGWINCH, SIGVTALRM, -1); + set_handler(SIGFPE, (__sighandler_t) sig_handler, SA_ONSTACK, + SIGUSR1, SIGIO, SIGWINCH, SIGVTALRM, -1); + set_handler(SIGILL, (__sighandler_t) sig_handler, SA_ONSTACK, + SIGUSR1, SIGIO, SIGWINCH, SIGVTALRM, -1); + set_handler(SIGBUS, (__sighandler_t) sig_handler, SA_ONSTACK, + SIGUSR1, SIGIO, SIGWINCH, SIGVTALRM, -1); + signal(SIGHUP, SIG_IGN); + + set_handler(SIGIO, (__sighandler_t) sig_handler, + SA_ONSTACK | SA_RESTART, SIGUSR1, SIGIO, SIGWINCH, SIGALRM, + SIGVTALRM, -1); + signal(SIGWINCH, SIG_IGN); +} + +int run_kernel_thread(int (*fn)(void *), void *arg, jmp_buf **jmp_ptr) +{ + jmp_buf buf; + int n; + + *jmp_ptr = &buf; + n = UML_SETJMP(&buf); + if (n != 0) + return n; + (*fn)(arg); + return 0; +} diff --git a/arch/um/os-Linux/registers.c b/arch/um/os-Linux/registers.c new file mode 100644 index 0000000..830fe6a --- /dev/null +++ b/arch/um/os-Linux/registers.c @@ -0,0 +1,51 @@ +/* + * Copyright (C) 2004 PathScale, Inc + * Copyright (C) 2004 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include <errno.h> +#include <string.h> +#include <sys/ptrace.h> +#include "sysdep/ptrace.h" + +int save_registers(int pid, struct uml_pt_regs *regs) +{ + int err; + + err = ptrace(PTRACE_GETREGS, pid, 0, regs->gp); + if (err < 0) + return -errno; + return 0; +} + +int restore_registers(int pid, struct uml_pt_regs *regs) +{ + int err; + + err = ptrace(PTRACE_SETREGS, pid, 0, regs->gp); + if (err < 0) + return -errno; + return 0; +} + +/* This is set once at boot time and not changed thereafter */ + +static unsigned long exec_regs[MAX_REG_NR]; + +int init_registers(int pid) +{ + int err; + + err = ptrace(PTRACE_GETREGS, pid, 0, exec_regs); + if (err < 0) + return -errno; + + arch_init_registers(pid); + return 0; +} + +void get_safe_registers(unsigned long *regs) +{ + memcpy(regs, exec_regs, sizeof(exec_regs)); +} diff --git a/arch/um/os-Linux/sigio.c b/arch/um/os-Linux/sigio.c new file mode 100644 index 0000000..63d299d --- /dev/null +++ b/arch/um/os-Linux/sigio.c @@ -0,0 +1,549 @@ +/* + * Copyright (C) 2002 - 2008 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include <unistd.h> +#include <errno.h> +#include <fcntl.h> +#include <poll.h> +#include <pty.h> +#include <sched.h> +#include <signal.h> +#include <string.h> +#include "kern_constants.h" +#include "kern_util.h" +#include "init.h" +#include "os.h" +#include "process.h" +#include "sigio.h" +#include "um_malloc.h" +#include "user.h" + +/* + * Protected by sigio_lock(), also used by sigio_cleanup, which is an + * exitcall. + */ +static int write_sigio_pid = -1; +static unsigned long write_sigio_stack; + +/* + * These arrays are initialized before the sigio thread is started, and + * the descriptors closed after it is killed. So, it can't see them change. + * On the UML side, they are changed under the sigio_lock. + */ +#define SIGIO_FDS_INIT {-1, -1} + +static int write_sigio_fds[2] = SIGIO_FDS_INIT; +static int sigio_private[2] = SIGIO_FDS_INIT; + +struct pollfds { + struct pollfd *poll; + int size; + int used; +}; + +/* + * Protected by sigio_lock(). Used by the sigio thread, but the UML thread + * synchronizes with it. + */ +static struct pollfds current_poll; +static struct pollfds next_poll; +static struct pollfds all_sigio_fds; + +static int write_sigio_thread(void *unused) +{ + struct pollfds *fds, tmp; + struct pollfd *p; + int i, n, respond_fd; + char c; + + signal(SIGWINCH, SIG_IGN); + fds = ¤t_poll; + while (1) { + n = poll(fds->poll, fds->used, -1); + if (n < 0) { + if (errno == EINTR) + continue; + printk(UM_KERN_ERR "write_sigio_thread : poll returned " + "%d, errno = %d\n", n, errno); + } + for (i = 0; i < fds->used; i++) { + p = &fds->poll[i]; + if (p->revents == 0) + continue; + if (p->fd == sigio_private[1]) { + CATCH_EINTR(n = read(sigio_private[1], &c, + sizeof(c))); + if (n != sizeof(c)) + printk(UM_KERN_ERR + "write_sigio_thread : " + "read on socket failed, " + "err = %d\n", errno); + tmp = current_poll; + current_poll = next_poll; + next_poll = tmp; + respond_fd = sigio_private[1]; + } + else { + respond_fd = write_sigio_fds[1]; + fds->used--; + memmove(&fds->poll[i], &fds->poll[i + 1], + (fds->used - i) * sizeof(*fds->poll)); + } + + CATCH_EINTR(n = write(respond_fd, &c, sizeof(c))); + if (n != sizeof(c)) + printk(UM_KERN_ERR "write_sigio_thread : " + "write on socket failed, err = %d\n", + errno); + } + } + + return 0; +} + +static int need_poll(struct pollfds *polls, int n) +{ + struct pollfd *new; + + if (n <= polls->size) + return 0; + + new = uml_kmalloc(n * sizeof(struct pollfd), UM_GFP_ATOMIC); + if (new == NULL) { + printk(UM_KERN_ERR "need_poll : failed to allocate new " + "pollfds\n"); + return -ENOMEM; + } + + memcpy(new, polls->poll, polls->used * sizeof(struct pollfd)); + kfree(polls->poll); + + polls->poll = new; + polls->size = n; + return 0; +} + +/* + * Must be called with sigio_lock held, because it's needed by the marked + * critical section. + */ +static void update_thread(void) +{ + unsigned long flags; + int n; + char c; + + flags = set_signals(0); + CATCH_EINTR(n = write(sigio_private[0], &c, sizeof(c))); + if (n != sizeof(c)) { + printk(UM_KERN_ERR "update_thread : write failed, err = %d\n", + errno); + goto fail; + } + + CATCH_EINTR(n = read(sigio_private[0], &c, sizeof(c))); + if (n != sizeof(c)) { + printk(UM_KERN_ERR "update_thread : read failed, err = %d\n", + errno); + goto fail; + } + + set_signals(flags); + return; + fail: + /* Critical section start */ + if (write_sigio_pid != -1) { + os_kill_process(write_sigio_pid, 1); + free_stack(write_sigio_stack, 0); + } + write_sigio_pid = -1; + close(sigio_private[0]); + close(sigio_private[1]); + close(write_sigio_fds[0]); + close(write_sigio_fds[1]); + /* Critical section end */ + set_signals(flags); +} + +int add_sigio_fd(int fd) +{ + struct pollfd *p; + int err = 0, i, n; + + sigio_lock(); + for (i = 0; i < all_sigio_fds.used; i++) { + if (all_sigio_fds.poll[i].fd == fd) + break; + } + if (i == all_sigio_fds.used) + goto out; + + p = &all_sigio_fds.poll[i]; + + for (i = 0; i < current_poll.used; i++) { + if (current_poll.poll[i].fd == fd) + goto out; + } + + n = current_poll.used; + err = need_poll(&next_poll, n + 1); + if (err) + goto out; + + memcpy(next_poll.poll, current_poll.poll, + current_poll.used * sizeof(struct pollfd)); + next_poll.poll[n] = *p; + next_poll.used = n + 1; + update_thread(); + out: + sigio_unlock(); + return err; +} + +int ignore_sigio_fd(int fd) +{ + struct pollfd *p; + int err = 0, i, n = 0; + + /* + * This is called from exitcalls elsewhere in UML - if + * sigio_cleanup has already run, then update_thread will hang + * or fail because the thread is no longer running. + */ + if (write_sigio_pid == -1) + return -EIO; + + sigio_lock(); + for (i = 0; i < current_poll.used; i++) { + if (current_poll.poll[i].fd == fd) + break; + } + if (i == current_poll.used) + goto out; + + err = need_poll(&next_poll, current_poll.used - 1); + if (err) + goto out; + + for (i = 0; i < current_poll.used; i++) { + p = ¤t_poll.poll[i]; + if (p->fd != fd) + next_poll.poll[n++] = *p; + } + next_poll.used = current_poll.used - 1; + + update_thread(); + out: + sigio_unlock(); + return err; +} + +static struct pollfd *setup_initial_poll(int fd) +{ + struct pollfd *p; + + p = uml_kmalloc(sizeof(struct pollfd), UM_GFP_KERNEL); + if (p == NULL) { + printk(UM_KERN_ERR "setup_initial_poll : failed to allocate " + "poll\n"); + return NULL; + } + *p = ((struct pollfd) { .fd = fd, + .events = POLLIN, + .revents = 0 }); + return p; +} + +static void write_sigio_workaround(void) +{ + struct pollfd *p; + int err; + int l_write_sigio_fds[2]; + int l_sigio_private[2]; + int l_write_sigio_pid; + + /* We call this *tons* of times - and most ones we must just fail. */ + sigio_lock(); + l_write_sigio_pid = write_sigio_pid; + sigio_unlock(); + + if (l_write_sigio_pid != -1) + return; + + err = os_pipe(l_write_sigio_fds, 1, 1); + if (err < 0) { + printk(UM_KERN_ERR "write_sigio_workaround - os_pipe 1 failed, " + "err = %d\n", -err); + return; + } + err = os_pipe(l_sigio_private, 1, 1); + if (err < 0) { + printk(UM_KERN_ERR "write_sigio_workaround - os_pipe 2 failed, " + "err = %d\n", -err); + goto out_close1; + } + + p = setup_initial_poll(l_sigio_private[1]); + if (!p) + goto out_close2; + + sigio_lock(); + + /* + * Did we race? Don't try to optimize this, please, it's not so likely + * to happen, and no more than once at the boot. + */ + if (write_sigio_pid != -1) + goto out_free; + + current_poll = ((struct pollfds) { .poll = p, + .used = 1, + .size = 1 }); + + if (write_sigio_irq(l_write_sigio_fds[0])) + goto out_clear_poll; + + memcpy(write_sigio_fds, l_write_sigio_fds, sizeof(l_write_sigio_fds)); + memcpy(sigio_private, l_sigio_private, sizeof(l_sigio_private)); + + write_sigio_pid = run_helper_thread(write_sigio_thread, NULL, + CLONE_FILES | CLONE_VM, + &write_sigio_stack); + + if (write_sigio_pid < 0) + goto out_clear; + + sigio_unlock(); + return; + +out_clear: + write_sigio_pid = -1; + write_sigio_fds[0] = -1; + write_sigio_fds[1] = -1; + sigio_private[0] = -1; + sigio_private[1] = -1; +out_clear_poll: + current_poll = ((struct pollfds) { .poll = NULL, + .size = 0, + .used = 0 }); +out_free: + sigio_unlock(); + kfree(p); +out_close2: + close(l_sigio_private[0]); + close(l_sigio_private[1]); +out_close1: + close(l_write_sigio_fds[0]); + close(l_write_sigio_fds[1]); +} + +void sigio_broken(int fd, int read) +{ + int err; + + write_sigio_workaround(); + + sigio_lock(); + err = need_poll(&all_sigio_fds, all_sigio_fds.used + 1); + if (err) { + printk(UM_KERN_ERR "maybe_sigio_broken - failed to add pollfd " + "for descriptor %d\n", fd); + goto out; + } + + all_sigio_fds.poll[all_sigio_fds.used++] = + ((struct pollfd) { .fd = fd, + .events = read ? POLLIN : POLLOUT, + .revents = 0 }); +out: + sigio_unlock(); +} + +/* Changed during early boot */ +static int pty_output_sigio; +static int pty_close_sigio; + +void maybe_sigio_broken(int fd, int read) +{ + if (!isatty(fd)) + return; + + if ((read || pty_output_sigio) && (!read || pty_close_sigio)) + return; + + sigio_broken(fd, read); +} + +static void sigio_cleanup(void) +{ + if (write_sigio_pid == -1) + return; + + os_kill_process(write_sigio_pid, 1); + free_stack(write_sigio_stack, 0); + write_sigio_pid = -1; +} + +__uml_exitcall(sigio_cleanup); + +/* Used as a flag during SIGIO testing early in boot */ +static int got_sigio; + +static void __init handler(int sig) +{ + got_sigio = 1; +} + +struct openpty_arg { + int master; + int slave; + int err; +}; + +static void openpty_cb(void *arg) +{ + struct openpty_arg *info = arg; + + info->err = 0; + if (openpty(&info->master, &info->slave, NULL, NULL, NULL)) + info->err = -errno; +} + +static int async_pty(int master, int slave) +{ + int flags; + + flags = fcntl(master, F_GETFL); + if (flags < 0) + return -errno; + + if ((fcntl(master, F_SETFL, flags | O_NONBLOCK | O_ASYNC) < 0) || + (fcntl(master, F_SETOWN, os_getpid()) < 0)) + return -errno; + + if ((fcntl(slave, F_SETFL, flags | O_NONBLOCK) < 0)) + return -errno; + + return 0; +} + +static void __init check_one_sigio(void (*proc)(int, int)) +{ + struct sigaction old, new; + struct openpty_arg pty = { .master = -1, .slave = -1 }; + int master, slave, err; + + initial_thread_cb(openpty_cb, &pty); + if (pty.err) { + printk(UM_KERN_ERR "check_one_sigio failed, errno = %d\n", + -pty.err); + return; + } + + master = pty.master; + slave = pty.slave; + + if ((master == -1) || (slave == -1)) { + printk(UM_KERN_ERR "check_one_sigio failed to allocate a " + "pty\n"); + return; + } + + /* Not now, but complain so we now where we failed. */ + err = raw(master); + if (err < 0) { + printk(UM_KERN_ERR "check_one_sigio : raw failed, errno = %d\n", + -err); + return; + } + + err = async_pty(master, slave); + if (err < 0) { + printk(UM_KERN_ERR "check_one_sigio : sigio_async failed, " + "err = %d\n", -err); + return; + } + + if (sigaction(SIGIO, NULL, &old) < 0) { + printk(UM_KERN_ERR "check_one_sigio : sigaction 1 failed, " + "errno = %d\n", errno); + return; + } + + new = old; + new.sa_handler = handler; + if (sigaction(SIGIO, &new, NULL) < 0) { + printk(UM_KERN_ERR "check_one_sigio : sigaction 2 failed, " + "errno = %d\n", errno); + return; + } + + got_sigio = 0; + (*proc)(master, slave); + + close(master); + close(slave); + + if (sigaction(SIGIO, &old, NULL) < 0) + printk(UM_KERN_ERR "check_one_sigio : sigaction 3 failed, " + "errno = %d\n", errno); +} + +static void tty_output(int master, int slave) +{ + int n; + char buf[512]; + + printk(UM_KERN_INFO "Checking that host ptys support output SIGIO..."); + + memset(buf, 0, sizeof(buf)); + + while (write(master, buf, sizeof(buf)) > 0) ; + if (errno != EAGAIN) + printk(UM_KERN_ERR "tty_output : write failed, errno = %d\n", + errno); + while (((n = read(slave, buf, sizeof(buf))) > 0) && + !({ barrier(); got_sigio; })) + ; + + if (got_sigio) { + printk(UM_KERN_CONT "Yes\n"); + pty_output_sigio = 1; + } else if (n == -EAGAIN) + printk(UM_KERN_CONT "No, enabling workaround\n"); + else + printk(UM_KERN_CONT "tty_output : read failed, err = %d\n", n); +} + +static void tty_close(int master, int slave) +{ + printk(UM_KERN_INFO "Checking that host ptys support SIGIO on " + "close..."); + + close(slave); + if (got_sigio) { + printk(UM_KERN_CONT "Yes\n"); + pty_close_sigio = 1; + } else + printk(UM_KERN_CONT "No, enabling workaround\n"); +} + +static void __init check_sigio(void) +{ + if ((access("/dev/ptmx", R_OK) < 0) && + (access("/dev/ptyp0", R_OK) < 0)) { + printk(UM_KERN_WARNING "No pseudo-terminals available - " + "skipping pty SIGIO check\n"); + return; + } + check_one_sigio(tty_output); + check_one_sigio(tty_close); +} + +/* Here because it only does the SIGIO testing for now */ +void __init os_check_bugs(void) +{ + check_sigio(); +} diff --git a/arch/um/os-Linux/signal.c b/arch/um/os-Linux/signal.c new file mode 100644 index 0000000..6ae1807 --- /dev/null +++ b/arch/um/os-Linux/signal.c @@ -0,0 +1,298 @@ +/* + * Copyright (C) 2004 PathScale, Inc + * Copyright (C) 2004 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include <stdlib.h> +#include <stdarg.h> +#include <errno.h> +#include <signal.h> +#include <strings.h> +#include "as-layout.h" +#include "kern_util.h" +#include "os.h" +#include "process.h" +#include "sysdep/barrier.h" +#include "sysdep/sigcontext.h" +#include "user.h" + +/* Copied from linux/compiler-gcc.h since we can't include it directly */ +#define barrier() __asm__ __volatile__("": : :"memory") + +void (*sig_info[NSIG])(int, struct uml_pt_regs *) = { + [SIGTRAP] = relay_signal, + [SIGFPE] = relay_signal, + [SIGILL] = relay_signal, + [SIGWINCH] = winch, + [SIGBUS] = bus_handler, + [SIGSEGV] = segv_handler, + [SIGIO] = sigio_handler, + [SIGVTALRM] = timer_handler }; + +static void sig_handler_common(int sig, struct sigcontext *sc) +{ + struct uml_pt_regs r; + int save_errno = errno; + + r.is_user = 0; + if (sig == SIGSEGV) { + /* For segfaults, we want the data from the sigcontext. */ + copy_sc(&r, sc); + GET_FAULTINFO_FROM_SC(r.faultinfo, sc); + } + + /* enable signals if sig isn't IRQ signal */ + if ((sig != SIGIO) && (sig != SIGWINCH) && (sig != SIGVTALRM)) + unblock_signals(); + + (*sig_info[sig])(sig, &r); + + errno = save_errno; +} + +/* + * These are the asynchronous signals. SIGPROF is excluded because we want to + * be able to profile all of UML, not just the non-critical sections. If + * profiling is not thread-safe, then that is not my problem. We can disable + * profiling when SMP is enabled in that case. + */ +#define SIGIO_BIT 0 +#define SIGIO_MASK (1 << SIGIO_BIT) + +#define SIGVTALRM_BIT 1 +#define SIGVTALRM_MASK (1 << SIGVTALRM_BIT) + +static int signals_enabled; +static unsigned int signals_pending; + +void sig_handler(int sig, struct sigcontext *sc) +{ + int enabled; + + enabled = signals_enabled; + if (!enabled && (sig == SIGIO)) { + signals_pending |= SIGIO_MASK; + return; + } + + block_signals(); + + sig_handler_common(sig, sc); + + set_signals(enabled); +} + +static void real_alarm_handler(struct sigcontext *sc) +{ + struct uml_pt_regs regs; + + if (sc != NULL) + copy_sc(®s, sc); + regs.is_user = 0; + unblock_signals(); + timer_handler(SIGVTALRM, ®s); +} + +void alarm_handler(int sig, struct sigcontext *sc) +{ + int enabled; + + enabled = signals_enabled; + if (!signals_enabled) { + signals_pending |= SIGVTALRM_MASK; + return; + } + + block_signals(); + + real_alarm_handler(sc); + set_signals(enabled); +} + +void timer_init(void) +{ + set_handler(SIGVTALRM, (__sighandler_t) alarm_handler, + SA_ONSTACK | SA_RESTART, SIGUSR1, SIGIO, SIGWINCH, -1); +} + +void set_sigstack(void *sig_stack, int size) +{ + stack_t stack = ((stack_t) { .ss_flags = 0, + .ss_sp = (__ptr_t) sig_stack, + .ss_size = size - sizeof(void *) }); + + if (sigaltstack(&stack, NULL) != 0) + panic("enabling signal stack failed, errno = %d\n", errno); +} + +static void (*handlers[_NSIG])(int sig, struct sigcontext *sc); + +void handle_signal(int sig, struct sigcontext *sc) +{ + unsigned long pending = 1UL << sig; + + do { + int nested, bail; + + /* + * pending comes back with one bit set for each + * interrupt that arrived while setting up the stack, + * plus a bit for this interrupt, plus the zero bit is + * set if this is a nested interrupt. + * If bail is true, then we interrupted another + * handler setting up the stack. In this case, we + * have to return, and the upper handler will deal + * with this interrupt. + */ + bail = to_irq_stack(&pending); + if (bail) + return; + + nested = pending & 1; + pending &= ~1; + + while ((sig = ffs(pending)) != 0){ + sig--; + pending &= ~(1 << sig); + (*handlers[sig])(sig, sc); + } + + /* + * Again, pending comes back with a mask of signals + * that arrived while tearing down the stack. If this + * is non-zero, we just go back, set up the stack + * again, and handle the new interrupts. + */ + if (!nested) + pending = from_irq_stack(nested); + } while (pending); +} + +extern void hard_handler(int sig); + +void set_handler(int sig, void (*handler)(int), int flags, ...) +{ + struct sigaction action; + va_list ap; + sigset_t sig_mask; + int mask; + + handlers[sig] = (void (*)(int, struct sigcontext *)) handler; + action.sa_handler = hard_handler; + + sigemptyset(&action.sa_mask); + + va_start(ap, flags); + while ((mask = va_arg(ap, int)) != -1) + sigaddset(&action.sa_mask, mask); + va_end(ap); + + if (sig == SIGSEGV) + flags |= SA_NODEFER; + + action.sa_flags = flags; + action.sa_restorer = NULL; + if (sigaction(sig, &action, NULL) < 0) + panic("sigaction failed - errno = %d\n", errno); + + sigemptyset(&sig_mask); + sigaddset(&sig_mask, sig); + if (sigprocmask(SIG_UNBLOCK, &sig_mask, NULL) < 0) + panic("sigprocmask failed - errno = %d\n", errno); +} + +int change_sig(int signal, int on) +{ + sigset_t sigset; + + sigemptyset(&sigset); + sigaddset(&sigset, signal); + if (sigprocmask(on ? SIG_UNBLOCK : SIG_BLOCK, &sigset, NULL) < 0) + return -errno; + + return 0; +} + +void block_signals(void) +{ + signals_enabled = 0; + /* + * This must return with signals disabled, so this barrier + * ensures that writes are flushed out before the return. + * This might matter if gcc figures out how to inline this and + * decides to shuffle this code into the caller. + */ + barrier(); +} + +void unblock_signals(void) +{ + int save_pending; + + if (signals_enabled == 1) + return; + + /* + * We loop because the IRQ handler returns with interrupts off. So, + * interrupts may have arrived and we need to re-enable them and + * recheck signals_pending. + */ + while (1) { + /* + * Save and reset save_pending after enabling signals. This + * way, signals_pending won't be changed while we're reading it. + */ + signals_enabled = 1; + + /* + * Setting signals_enabled and reading signals_pending must + * happen in this order. + */ + barrier(); + + save_pending = signals_pending; + if (save_pending == 0) + return; + + signals_pending = 0; + + /* + * We have pending interrupts, so disable signals, as the + * handlers expect them off when they are called. They will + * be enabled again above. + */ + + signals_enabled = 0; + + /* + * Deal with SIGIO first because the alarm handler might + * schedule, leaving the pending SIGIO stranded until we come + * back here. + */ + if (save_pending & SIGIO_MASK) + sig_handler_common(SIGIO, NULL); + + if (save_pending & SIGVTALRM_MASK) + real_alarm_handler(NULL); + } +} + +int get_signals(void) +{ + return signals_enabled; +} + +int set_signals(int enable) +{ + int ret; + if (signals_enabled == enable) + return enable; + + ret = signals_enabled; + if (enable) + unblock_signals(); + else block_signals(); + + return ret; +} diff --git a/arch/um/os-Linux/skas/Makefile b/arch/um/os-Linux/skas/Makefile new file mode 100644 index 0000000..d2ea340 --- /dev/null +++ b/arch/um/os-Linux/skas/Makefile @@ -0,0 +1,10 @@ +# +# Copyright (C) 2002 - 2007 Jeff Dike (jdike@{linux.intel,addtoit}.com) +# Licensed under the GPL +# + +obj-y := mem.o process.o + +USER_OBJS := $(obj-y) + +include arch/um/scripts/Makefile.rules diff --git a/arch/um/os-Linux/skas/mem.c b/arch/um/os-Linux/skas/mem.c new file mode 100644 index 0000000..d261f17 --- /dev/null +++ b/arch/um/os-Linux/skas/mem.c @@ -0,0 +1,295 @@ +/* + * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include <stddef.h> +#include <unistd.h> +#include <errno.h> +#include <string.h> +#include <sys/mman.h> +#include "init.h" +#include "kern_constants.h" +#include "as-layout.h" +#include "mm_id.h" +#include "os.h" +#include "proc_mm.h" +#include "ptrace_user.h" +#include "registers.h" +#include "skas.h" +#include "user.h" +#include "sysdep/ptrace.h" +#include "sysdep/stub.h" + +extern unsigned long batch_syscall_stub, __syscall_stub_start; + +extern void wait_stub_done(int pid); + +static inline unsigned long *check_init_stack(struct mm_id * mm_idp, + unsigned long *stack) +{ + if (stack == NULL) { + stack = (unsigned long *) mm_idp->stack + 2; + *stack = 0; + } + return stack; +} + +static unsigned long syscall_regs[MAX_REG_NR]; + +static int __init init_syscall_regs(void) +{ + get_safe_registers(syscall_regs); + syscall_regs[REGS_IP_INDEX] = STUB_CODE + + ((unsigned long) &batch_syscall_stub - + (unsigned long) &__syscall_stub_start); + return 0; +} + +__initcall(init_syscall_regs); + +extern int proc_mm; + +int single_count = 0; +int multi_count = 0; +int multi_op_count = 0; + +static inline long do_syscall_stub(struct mm_id * mm_idp, void **addr) +{ + int n, i; + long ret, offset; + unsigned long * data; + unsigned long * syscall; + int err, pid = mm_idp->u.pid; + + if (proc_mm) + /* FIXME: Need to look up userspace_pid by cpu */ + pid = userspace_pid[0]; + + multi_count++; + + n = ptrace_setregs(pid, syscall_regs); + if (n < 0) { + printk(UM_KERN_ERR "Registers - \n"); + for (i = 0; i < MAX_REG_NR; i++) + printk(UM_KERN_ERR "\t%d\t0x%lx\n", i, syscall_regs[i]); + panic("do_syscall_stub : PTRACE_SETREGS failed, errno = %d\n", + -n); + } + + err = ptrace(PTRACE_CONT, pid, 0, 0); + if (err) + panic("Failed to continue stub, pid = %d, errno = %d\n", pid, + errno); + + wait_stub_done(pid); + + /* + * When the stub stops, we find the following values on the + * beginning of the stack: + * (long )return_value + * (long )offset to failed sycall-data (0, if no error) + */ + ret = *((unsigned long *) mm_idp->stack); + offset = *((unsigned long *) mm_idp->stack + 1); + if (offset) { + data = (unsigned long *)(mm_idp->stack + offset - STUB_DATA); + printk(UM_KERN_ERR "do_syscall_stub : ret = %ld, offset = %ld, " + "data = %p\n", ret, offset, data); + syscall = (unsigned long *)((unsigned long)data + data[0]); + printk(UM_KERN_ERR "do_syscall_stub: syscall %ld failed, " + "return value = 0x%lx, expected return value = 0x%lx\n", + syscall[0], ret, syscall[7]); + printk(UM_KERN_ERR " syscall parameters: " + "0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n", + syscall[1], syscall[2], syscall[3], + syscall[4], syscall[5], syscall[6]); + for (n = 1; n < data[0]/sizeof(long); n++) { + if (n == 1) + printk(UM_KERN_ERR " additional syscall " + "data:"); + if (n % 4 == 1) + printk("\n" UM_KERN_ERR " "); + printk(" 0x%lx", data[n]); + } + if (n > 1) + printk("\n"); + } + else ret = 0; + + *addr = check_init_stack(mm_idp, NULL); + + return ret; +} + +long run_syscall_stub(struct mm_id * mm_idp, int syscall, + unsigned long *args, long expected, void **addr, + int done) +{ + unsigned long *stack = check_init_stack(mm_idp, *addr); + + if (done && *addr == NULL) + single_count++; + + *stack += sizeof(long); + stack += *stack / sizeof(long); + + *stack++ = syscall; + *stack++ = args[0]; + *stack++ = args[1]; + *stack++ = args[2]; + *stack++ = args[3]; + *stack++ = args[4]; + *stack++ = args[5]; + *stack++ = expected; + *stack = 0; + multi_op_count++; + + if (!done && ((((unsigned long) stack) & ~UM_KERN_PAGE_MASK) < + UM_KERN_PAGE_SIZE - 10 * sizeof(long))) { + *addr = stack; + return 0; + } + + return do_syscall_stub(mm_idp, addr); +} + +long syscall_stub_data(struct mm_id * mm_idp, + unsigned long *data, int data_count, + void **addr, void **stub_addr) +{ + unsigned long *stack; + int ret = 0; + + /* + * If *addr still is uninitialized, it *must* contain NULL. + * Thus in this case do_syscall_stub correctly won't be called. + */ + if ((((unsigned long) *addr) & ~UM_KERN_PAGE_MASK) >= + UM_KERN_PAGE_SIZE - (10 + data_count) * sizeof(long)) { + ret = do_syscall_stub(mm_idp, addr); + /* in case of error, don't overwrite data on stack */ + if (ret) + return ret; + } + + stack = check_init_stack(mm_idp, *addr); + *addr = stack; + + *stack = data_count * sizeof(long); + + memcpy(stack + 1, data, data_count * sizeof(long)); + + *stub_addr = (void *)(((unsigned long)(stack + 1) & + ~UM_KERN_PAGE_MASK) + STUB_DATA); + + return 0; +} + +int map(struct mm_id * mm_idp, unsigned long virt, unsigned long len, int prot, + int phys_fd, unsigned long long offset, int done, void **data) +{ + int ret; + + if (proc_mm) { + struct proc_mm_op map; + int fd = mm_idp->u.mm_fd; + + map = ((struct proc_mm_op) { .op = MM_MMAP, + .u = + { .mmap = + { .addr = virt, + .len = len, + .prot = prot, + .flags = MAP_SHARED | + MAP_FIXED, + .fd = phys_fd, + .offset= offset + } } } ); + CATCH_EINTR(ret = write(fd, &map, sizeof(map))); + if (ret != sizeof(map)) { + ret = -errno; + printk(UM_KERN_ERR "map : /proc/mm map failed, " + "err = %d\n", -ret); + } + else ret = 0; + } + else { + unsigned long args[] = { virt, len, prot, + MAP_SHARED | MAP_FIXED, phys_fd, + MMAP_OFFSET(offset) }; + + ret = run_syscall_stub(mm_idp, STUB_MMAP_NR, args, virt, + data, done); + } + + return ret; +} + +int unmap(struct mm_id * mm_idp, unsigned long addr, unsigned long len, + int done, void **data) +{ + int ret; + + if (proc_mm) { + struct proc_mm_op unmap; + int fd = mm_idp->u.mm_fd; + + unmap = ((struct proc_mm_op) { .op = MM_MUNMAP, + .u = + { .munmap = + { .addr = + (unsigned long) addr, + .len = len } } } ); + CATCH_EINTR(ret = write(fd, &unmap, sizeof(unmap))); + if (ret != sizeof(unmap)) { + ret = -errno; + printk(UM_KERN_ERR "unmap - proc_mm write returned " + "%d\n", ret); + } + else ret = 0; + } + else { + unsigned long args[] = { (unsigned long) addr, len, 0, 0, 0, + 0 }; + + ret = run_syscall_stub(mm_idp, __NR_munmap, args, 0, + data, done); + } + + return ret; +} + +int protect(struct mm_id * mm_idp, unsigned long addr, unsigned long len, + unsigned int prot, int done, void **data) +{ + struct proc_mm_op protect; + int ret; + + if (proc_mm) { + int fd = mm_idp->u.mm_fd; + + protect = ((struct proc_mm_op) { .op = MM_MPROTECT, + .u = + { .mprotect = + { .addr = + (unsigned long) addr, + .len = len, + .prot = prot } } } ); + + CATCH_EINTR(ret = write(fd, &protect, sizeof(protect))); + if (ret != sizeof(protect)) { + ret = -errno; + printk(UM_KERN_ERR "protect failed, err = %d", -ret); + } + else ret = 0; + } + else { + unsigned long args[] = { addr, len, prot, 0, 0, 0 }; + + ret = run_syscall_stub(mm_idp, __NR_mprotect, args, 0, + data, done); + } + + return ret; +} diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c new file mode 100644 index 0000000..d6e0a22 --- /dev/null +++ b/arch/um/os-Linux/skas/process.c @@ -0,0 +1,728 @@ +/* + * Copyright (C) 2002- 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include <stdlib.h> +#include <unistd.h> +#include <sched.h> +#include <errno.h> +#include <string.h> +#include <sys/mman.h> +#include <sys/ptrace.h> +#include <sys/wait.h> +#include <asm/unistd.h> +#include "as-layout.h" +#include "chan_user.h" +#include "kern_constants.h" +#include "kern_util.h" +#include "mem.h" +#include "os.h" +#include "process.h" +#include "proc_mm.h" +#include "ptrace_user.h" +#include "registers.h" +#include "skas.h" +#include "skas_ptrace.h" +#include "user.h" +#include "sysdep/stub.h" + +int is_skas_winch(int pid, int fd, void *data) +{ + if (pid != getpgrp()) + return 0; + + register_winch_irq(-1, fd, -1, data, 0); + return 1; +} + +static int ptrace_dump_regs(int pid) +{ + unsigned long regs[MAX_REG_NR]; + int i; + + if (ptrace(PTRACE_GETREGS, pid, 0, regs) < 0) + return -errno; + + printk(UM_KERN_ERR "Stub registers -\n"); + for (i = 0; i < ARRAY_SIZE(regs); i++) + printk(UM_KERN_ERR "\t%d - %lx\n", i, regs[i]); + + return 0; +} + +/* + * Signals that are OK to receive in the stub - we'll just continue it. + * SIGWINCH will happen when UML is inside a detached screen. + */ +#define STUB_SIG_MASK ((1 << SIGVTALRM) | (1 << SIGWINCH)) + +/* Signals that the stub will finish with - anything else is an error */ +#define STUB_DONE_MASK (1 << SIGTRAP) + +void wait_stub_done(int pid) +{ + int n, status, err; + + while (1) { + CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED | __WALL)); + if ((n < 0) || !WIFSTOPPED(status)) + goto bad_wait; + + if (((1 << WSTOPSIG(status)) & STUB_SIG_MASK) == 0) + break; + + err = ptrace(PTRACE_CONT, pid, 0, 0); + if (err) { + printk(UM_KERN_ERR "wait_stub_done : continue failed, " + "errno = %d\n", errno); + fatal_sigsegv(); + } + } + + if (((1 << WSTOPSIG(status)) & STUB_DONE_MASK) != 0) + return; + +bad_wait: + err = ptrace_dump_regs(pid); + if (err) + printk(UM_KERN_ERR "Failed to get registers from stub, " + "errno = %d\n", -err); + printk(UM_KERN_ERR "wait_stub_done : failed to wait for SIGTRAP, " + "pid = %d, n = %d, errno = %d, status = 0x%x\n", pid, n, errno, + status); + fatal_sigsegv(); +} + +extern unsigned long current_stub_stack(void); + +static void get_skas_faultinfo(int pid, struct faultinfo *fi) +{ + int err; + + if (ptrace_faultinfo) { + err = ptrace(PTRACE_FAULTINFO, pid, 0, fi); + if (err) { + printk(UM_KERN_ERR "get_skas_faultinfo - " + "PTRACE_FAULTINFO failed, errno = %d\n", errno); + fatal_sigsegv(); + } + + /* Special handling for i386, which has different structs */ + if (sizeof(struct ptrace_faultinfo) < sizeof(struct faultinfo)) + memset((char *)fi + sizeof(struct ptrace_faultinfo), 0, + sizeof(struct faultinfo) - + sizeof(struct ptrace_faultinfo)); + } + else { + unsigned long fpregs[FP_SIZE]; + + err = get_fp_registers(pid, fpregs); + if (err < 0) { + printk(UM_KERN_ERR "save_fp_registers returned %d\n", + err); + fatal_sigsegv(); + } + err = ptrace(PTRACE_CONT, pid, 0, SIGSEGV); + if (err) { + printk(UM_KERN_ERR "Failed to continue stub, pid = %d, " + "errno = %d\n", pid, errno); + fatal_sigsegv(); + } + wait_stub_done(pid); + + /* + * faultinfo is prepared by the stub-segv-handler at start of + * the stub stack page. We just have to copy it. + */ + memcpy(fi, (void *)current_stub_stack(), sizeof(*fi)); + + err = put_fp_registers(pid, fpregs); + if (err < 0) { + printk(UM_KERN_ERR "put_fp_registers returned %d\n", + err); + fatal_sigsegv(); + } + } +} + +static void handle_segv(int pid, struct uml_pt_regs * regs) +{ + get_skas_faultinfo(pid, ®s->faultinfo); + segv(regs->faultinfo, 0, 1, NULL); +} + +/* + * To use the same value of using_sysemu as the caller, ask it that value + * (in local_using_sysemu + */ +static void handle_trap(int pid, struct uml_pt_regs *regs, + int local_using_sysemu) +{ + int err, status; + + if ((UPT_IP(regs) >= STUB_START) && (UPT_IP(regs) < STUB_END)) + fatal_sigsegv(); + + /* Mark this as a syscall */ + UPT_SYSCALL_NR(regs) = PT_SYSCALL_NR(regs->gp); + + if (!local_using_sysemu) + { + err = ptrace(PTRACE_POKEUSR, pid, PT_SYSCALL_NR_OFFSET, + __NR_getpid); + if (err < 0) { + printk(UM_KERN_ERR "handle_trap - nullifying syscall " + "failed, errno = %d\n", errno); + fatal_sigsegv(); + } + + err = ptrace(PTRACE_SYSCALL, pid, 0, 0); + if (err < 0) { + printk(UM_KERN_ERR "handle_trap - continuing to end of " + "syscall failed, errno = %d\n", errno); + fatal_sigsegv(); + } + + CATCH_EINTR(err = waitpid(pid, &status, WUNTRACED | __WALL)); + if ((err < 0) || !WIFSTOPPED(status) || + (WSTOPSIG(status) != SIGTRAP + 0x80)) { + err = ptrace_dump_regs(pid); + if (err) + printk(UM_KERN_ERR "Failed to get registers " + "from process, errno = %d\n", -err); + printk(UM_KERN_ERR "handle_trap - failed to wait at " + "end of syscall, errno = %d, status = %d\n", + errno, status); + fatal_sigsegv(); + } + } + + handle_syscall(regs); +} + +extern int __syscall_stub_start; + +static int userspace_tramp(void *stack) +{ + void *addr; + int err; + + ptrace(PTRACE_TRACEME, 0, 0, 0); + + signal(SIGTERM, SIG_DFL); + signal(SIGWINCH, SIG_IGN); + err = set_interval(); + if (err) { + printk(UM_KERN_ERR "userspace_tramp - setting timer failed, " + "errno = %d\n", err); + exit(1); + } + + if (!proc_mm) { + /* + * This has a pte, but it can't be mapped in with the usual + * tlb_flush mechanism because this is part of that mechanism + */ + int fd; + unsigned long long offset; + fd = phys_mapping(to_phys(&__syscall_stub_start), &offset); + addr = mmap64((void *) STUB_CODE, UM_KERN_PAGE_SIZE, + PROT_EXEC, MAP_FIXED | MAP_PRIVATE, fd, offset); + if (addr == MAP_FAILED) { + printk(UM_KERN_ERR "mapping mmap stub at 0x%lx failed, " + "errno = %d\n", STUB_CODE, errno); + exit(1); + } + + if (stack != NULL) { + fd = phys_mapping(to_phys(stack), &offset); + addr = mmap((void *) STUB_DATA, + UM_KERN_PAGE_SIZE, PROT_READ | PROT_WRITE, + MAP_FIXED | MAP_SHARED, fd, offset); + if (addr == MAP_FAILED) { + printk(UM_KERN_ERR "mapping segfault stack " + "at 0x%lx failed, errno = %d\n", + STUB_DATA, errno); + exit(1); + } + } + } + if (!ptrace_faultinfo && (stack != NULL)) { + struct sigaction sa; + + unsigned long v = STUB_CODE + + (unsigned long) stub_segv_handler - + (unsigned long) &__syscall_stub_start; + + set_sigstack((void *) STUB_DATA, UM_KERN_PAGE_SIZE); + sigemptyset(&sa.sa_mask); + sa.sa_flags = SA_ONSTACK | SA_NODEFER; + sa.sa_handler = (void *) v; + sa.sa_restorer = NULL; + if (sigaction(SIGSEGV, &sa, NULL) < 0) { + printk(UM_KERN_ERR "userspace_tramp - setting SIGSEGV " + "handler failed - errno = %d\n", errno); + exit(1); + } + } + + kill(os_getpid(), SIGSTOP); + return 0; +} + +/* Each element set once, and only accessed by a single processor anyway */ +#undef NR_CPUS +#define NR_CPUS 1 +int userspace_pid[NR_CPUS]; + +int start_userspace(unsigned long stub_stack) +{ + void *stack; + unsigned long sp; + int pid, status, n, flags, err; + + stack = mmap(NULL, UM_KERN_PAGE_SIZE, + PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (stack == MAP_FAILED) { + err = -errno; + printk(UM_KERN_ERR "start_userspace : mmap failed, " + "errno = %d\n", errno); + return err; + } + + sp = (unsigned long) stack + UM_KERN_PAGE_SIZE - sizeof(void *); + + flags = CLONE_FILES; + if (proc_mm) + flags |= CLONE_VM; + else + flags |= SIGCHLD; + + pid = clone(userspace_tramp, (void *) sp, flags, (void *) stub_stack); + if (pid < 0) { + err = -errno; + printk(UM_KERN_ERR "start_userspace : clone failed, " + "errno = %d\n", errno); + return err; + } + + do { + CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED | __WALL)); + if (n < 0) { + err = -errno; + printk(UM_KERN_ERR "start_userspace : wait failed, " + "errno = %d\n", errno); + goto out_kill; + } + } while (WIFSTOPPED(status) && (WSTOPSIG(status) == SIGVTALRM)); + + if (!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGSTOP)) { + err = -EINVAL; + printk(UM_KERN_ERR "start_userspace : expected SIGSTOP, got " + "status = %d\n", status); + goto out_kill; + } + + if (ptrace(PTRACE_OLDSETOPTIONS, pid, NULL, + (void *) PTRACE_O_TRACESYSGOOD) < 0) { + err = -errno; + printk(UM_KERN_ERR "start_userspace : PTRACE_OLDSETOPTIONS " + "failed, errno = %d\n", errno); + goto out_kill; + } + + if (munmap(stack, UM_KERN_PAGE_SIZE) < 0) { + err = -errno; + printk(UM_KERN_ERR "start_userspace : munmap failed, " + "errno = %d\n", errno); + goto out_kill; + } + + return pid; + + out_kill: + os_kill_ptraced_process(pid, 1); + return err; +} + +void userspace(struct uml_pt_regs *regs) +{ + struct itimerval timer; + unsigned long long nsecs, now; + int err, status, op, pid = userspace_pid[0]; + /* To prevent races if using_sysemu changes under us.*/ + int local_using_sysemu; + + if (getitimer(ITIMER_VIRTUAL, &timer)) + printk(UM_KERN_ERR "Failed to get itimer, errno = %d\n", errno); + nsecs = timer.it_value.tv_sec * UM_NSEC_PER_SEC + + timer.it_value.tv_usec * UM_NSEC_PER_USEC; + nsecs += os_nsecs(); + + while (1) { + /* + * This can legitimately fail if the process loads a + * bogus value into a segment register. It will + * segfault and PTRACE_GETREGS will read that value + * out of the process. However, PTRACE_SETREGS will + * fail. In this case, there is nothing to do but + * just kill the process. + */ + if (ptrace(PTRACE_SETREGS, pid, 0, regs->gp)) + fatal_sigsegv(); + + /* Now we set local_using_sysemu to be used for one loop */ + local_using_sysemu = get_using_sysemu(); + + op = SELECT_PTRACE_OPERATION(local_using_sysemu, + singlestepping(NULL)); + + if (ptrace(op, pid, 0, 0)) { + printk(UM_KERN_ERR "userspace - ptrace continue " + "failed, op = %d, errno = %d\n", op, errno); + fatal_sigsegv(); + } + + CATCH_EINTR(err = waitpid(pid, &status, WUNTRACED | __WALL)); + if (err < 0) { + printk(UM_KERN_ERR "userspace - wait failed, " + "errno = %d\n", errno); + fatal_sigsegv(); + } + + regs->is_user = 1; + if (ptrace(PTRACE_GETREGS, pid, 0, regs->gp)) { + printk(UM_KERN_ERR "userspace - PTRACE_GETREGS failed, " + "errno = %d\n", errno); + fatal_sigsegv(); + } + + UPT_SYSCALL_NR(regs) = -1; /* Assume: It's not a syscall */ + + if (WIFSTOPPED(status)) { + int sig = WSTOPSIG(status); + switch (sig) { + case SIGSEGV: + if (PTRACE_FULL_FAULTINFO || + !ptrace_faultinfo) { + get_skas_faultinfo(pid, + ®s->faultinfo); + (*sig_info[SIGSEGV])(SIGSEGV, regs); + } + else handle_segv(pid, regs); + break; + case SIGTRAP + 0x80: + handle_trap(pid, regs, local_using_sysemu); + break; + case SIGTRAP: + relay_signal(SIGTRAP, regs); + break; + case SIGVTALRM: + now = os_nsecs(); + if (now < nsecs) + break; + block_signals(); + (*sig_info[sig])(sig, regs); + unblock_signals(); + nsecs = timer.it_value.tv_sec * + UM_NSEC_PER_SEC + + timer.it_value.tv_usec * + UM_NSEC_PER_USEC; + nsecs += os_nsecs(); + break; + case SIGIO: + case SIGILL: + case SIGBUS: + case SIGFPE: + case SIGWINCH: + block_signals(); + (*sig_info[sig])(sig, regs); + unblock_signals(); + break; + default: + printk(UM_KERN_ERR "userspace - child stopped " + "with signal %d\n", sig); + fatal_sigsegv(); + } + pid = userspace_pid[0]; + interrupt_end(); + + /* Avoid -ERESTARTSYS handling in host */ + if (PT_SYSCALL_NR_OFFSET != PT_SYSCALL_RET_OFFSET) + PT_SYSCALL_NR(regs->gp) = -1; + } + } +} + +static unsigned long thread_regs[MAX_REG_NR]; + +static int __init init_thread_regs(void) +{ + get_safe_registers(thread_regs); + /* Set parent's instruction pointer to start of clone-stub */ + thread_regs[REGS_IP_INDEX] = STUB_CODE + + (unsigned long) stub_clone_handler - + (unsigned long) &__syscall_stub_start; + thread_regs[REGS_SP_INDEX] = STUB_DATA + UM_KERN_PAGE_SIZE - + sizeof(void *); +#ifdef __SIGNAL_FRAMESIZE + thread_regs[REGS_SP_INDEX] -= __SIGNAL_FRAMESIZE; +#endif + return 0; +} + +__initcall(init_thread_regs); + +int copy_context_skas0(unsigned long new_stack, int pid) +{ + struct timeval tv = { .tv_sec = 0, .tv_usec = UM_USEC_PER_SEC / UM_HZ }; + int err; + unsigned long current_stack = current_stub_stack(); + struct stub_data *data = (struct stub_data *) current_stack; + struct stub_data *child_data = (struct stub_data *) new_stack; + unsigned long long new_offset; + int new_fd = phys_mapping(to_phys((void *)new_stack), &new_offset); + + /* + * prepare offset and fd of child's stack as argument for parent's + * and child's mmap2 calls + */ + *data = ((struct stub_data) { .offset = MMAP_OFFSET(new_offset), + .fd = new_fd, + .timer = ((struct itimerval) + { .it_value = tv, + .it_interval = tv }) }); + + err = ptrace_setregs(pid, thread_regs); + if (err < 0) { + err = -errno; + printk(UM_KERN_ERR "copy_context_skas0 : PTRACE_SETREGS " + "failed, pid = %d, errno = %d\n", pid, -err); + return err; + } + + /* set a well known return code for detection of child write failure */ + child_data->err = 12345678; + + /* + * Wait, until parent has finished its work: read child's pid from + * parent's stack, and check, if bad result. + */ + err = ptrace(PTRACE_CONT, pid, 0, 0); + if (err) { + err = -errno; + printk(UM_KERN_ERR "Failed to continue new process, pid = %d, " + "errno = %d\n", pid, errno); + return err; + } + + wait_stub_done(pid); + + pid = data->err; + if (pid < 0) { + printk(UM_KERN_ERR "copy_context_skas0 - stub-parent reports " + "error %d\n", -pid); + return pid; + } + + /* + * Wait, until child has finished too: read child's result from + * child's stack and check it. + */ + wait_stub_done(pid); + if (child_data->err != STUB_DATA) { + printk(UM_KERN_ERR "copy_context_skas0 - stub-child reports " + "error %ld\n", child_data->err); + err = child_data->err; + goto out_kill; + } + + if (ptrace(PTRACE_OLDSETOPTIONS, pid, NULL, + (void *)PTRACE_O_TRACESYSGOOD) < 0) { + err = -errno; + printk(UM_KERN_ERR "copy_context_skas0 : PTRACE_OLDSETOPTIONS " + "failed, errno = %d\n", errno); + goto out_kill; + } + + return pid; + + out_kill: + os_kill_ptraced_process(pid, 1); + return err; +} + +/* + * This is used only, if stub pages are needed, while proc_mm is + * available. Opening /proc/mm creates a new mm_context, which lacks + * the stub-pages. Thus, we map them using /proc/mm-fd + */ +int map_stub_pages(int fd, unsigned long code, unsigned long data, + unsigned long stack) +{ + struct proc_mm_op mmop; + int n; + unsigned long long code_offset; + int code_fd = phys_mapping(to_phys((void *) &__syscall_stub_start), + &code_offset); + + mmop = ((struct proc_mm_op) { .op = MM_MMAP, + .u = + { .mmap = + { .addr = code, + .len = UM_KERN_PAGE_SIZE, + .prot = PROT_EXEC, + .flags = MAP_FIXED | MAP_PRIVATE, + .fd = code_fd, + .offset = code_offset + } } }); + CATCH_EINTR(n = write(fd, &mmop, sizeof(mmop))); + if (n != sizeof(mmop)) { + n = errno; + printk(UM_KERN_ERR "mmap args - addr = 0x%lx, fd = %d, " + "offset = %llx\n", code, code_fd, + (unsigned long long) code_offset); + printk(UM_KERN_ERR "map_stub_pages : /proc/mm map for code " + "failed, err = %d\n", n); + return -n; + } + + if (stack) { + unsigned long long map_offset; + int map_fd = phys_mapping(to_phys((void *)stack), &map_offset); + mmop = ((struct proc_mm_op) + { .op = MM_MMAP, + .u = + { .mmap = + { .addr = data, + .len = UM_KERN_PAGE_SIZE, + .prot = PROT_READ | PROT_WRITE, + .flags = MAP_FIXED | MAP_SHARED, + .fd = map_fd, + .offset = map_offset + } } }); + CATCH_EINTR(n = write(fd, &mmop, sizeof(mmop))); + if (n != sizeof(mmop)) { + n = errno; + printk(UM_KERN_ERR "map_stub_pages : /proc/mm map for " + "data failed, err = %d\n", n); + return -n; + } + } + + return 0; +} + +void new_thread(void *stack, jmp_buf *buf, void (*handler)(void)) +{ + (*buf)[0].JB_IP = (unsigned long) handler; + (*buf)[0].JB_SP = (unsigned long) stack + UM_THREAD_SIZE - + sizeof(void *); +} + +#define INIT_JMP_NEW_THREAD 0 +#define INIT_JMP_CALLBACK 1 +#define INIT_JMP_HALT 2 +#define INIT_JMP_REBOOT 3 + +void switch_threads(jmp_buf *me, jmp_buf *you) +{ + if (UML_SETJMP(me) == 0) + UML_LONGJMP(you, 1); +} + +static jmp_buf initial_jmpbuf; + +/* XXX Make these percpu */ +static void (*cb_proc)(void *arg); +static void *cb_arg; +static jmp_buf *cb_back; + +int start_idle_thread(void *stack, jmp_buf *switch_buf) +{ + int n; + + set_handler(SIGWINCH, (__sighandler_t) sig_handler, + SA_ONSTACK | SA_RESTART, SIGUSR1, SIGIO, SIGVTALRM, -1); + + /* + * Can't use UML_SETJMP or UML_LONGJMP here because they save + * and restore signals, with the possible side-effect of + * trying to handle any signals which came when they were + * blocked, which can't be done on this stack. + * Signals must be blocked when jumping back here and restored + * after returning to the jumper. + */ + n = setjmp(initial_jmpbuf); + switch (n) { + case INIT_JMP_NEW_THREAD: + (*switch_buf)[0].JB_IP = (unsigned long) new_thread_handler; + (*switch_buf)[0].JB_SP = (unsigned long) stack + + UM_THREAD_SIZE - sizeof(void *); + break; + case INIT_JMP_CALLBACK: + (*cb_proc)(cb_arg); + longjmp(*cb_back, 1); + break; + case INIT_JMP_HALT: + kmalloc_ok = 0; + return 0; + case INIT_JMP_REBOOT: + kmalloc_ok = 0; + return 1; + default: + printk(UM_KERN_ERR "Bad sigsetjmp return in " + "start_idle_thread - %d\n", n); + fatal_sigsegv(); + } + longjmp(*switch_buf, 1); +} + +void initial_thread_cb_skas(void (*proc)(void *), void *arg) +{ + jmp_buf here; + + cb_proc = proc; + cb_arg = arg; + cb_back = &here; + + block_signals(); + if (UML_SETJMP(&here) == 0) + UML_LONGJMP(&initial_jmpbuf, INIT_JMP_CALLBACK); + unblock_signals(); + + cb_proc = NULL; + cb_arg = NULL; + cb_back = NULL; +} + +void halt_skas(void) +{ + block_signals(); + UML_LONGJMP(&initial_jmpbuf, INIT_JMP_HALT); +} + +void reboot_skas(void) +{ + block_signals(); + UML_LONGJMP(&initial_jmpbuf, INIT_JMP_REBOOT); +} + +void __switch_mm(struct mm_id *mm_idp) +{ + int err; + + /* FIXME: need cpu pid in __switch_mm */ + if (proc_mm) { + err = ptrace(PTRACE_SWITCH_MM, userspace_pid[0], 0, + mm_idp->u.mm_fd); + if (err) { + printk(UM_KERN_ERR "__switch_mm - PTRACE_SWITCH_MM " + "failed, errno = %d\n", errno); + fatal_sigsegv(); + } + } + else userspace_pid[0] = mm_idp->u.pid; +} diff --git a/arch/um/os-Linux/start_up.c b/arch/um/os-Linux/start_up.c new file mode 100644 index 0000000..183db26 --- /dev/null +++ b/arch/um/os-Linux/start_up.c @@ -0,0 +1,542 @@ +/* + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include <stdio.h> +#include <stdlib.h> +#include <stdarg.h> +#include <unistd.h> +#include <errno.h> +#include <fcntl.h> +#include <sched.h> +#include <signal.h> +#include <string.h> +#include <sys/mman.h> +#include <sys/ptrace.h> +#include <sys/stat.h> +#include <sys/wait.h> +#include <asm/unistd.h> +#include "init.h" +#include "kern_constants.h" +#include "os.h" +#include "mem_user.h" +#include "ptrace_user.h" +#include "registers.h" +#include "skas.h" +#include "skas_ptrace.h" + +static void ptrace_child(void) +{ + int ret; + /* Calling os_getpid because some libcs cached getpid incorrectly */ + int pid = os_getpid(), ppid = getppid(); + int sc_result; + + if (change_sig(SIGWINCH, 0) < 0 || + ptrace(PTRACE_TRACEME, 0, 0, 0) < 0) { + perror("ptrace"); + kill(pid, SIGKILL); + } + kill(pid, SIGSTOP); + + /* + * This syscall will be intercepted by the parent. Don't call more than + * once, please. + */ + sc_result = os_getpid(); + + if (sc_result == pid) + /* Nothing modified by the parent, we are running normally. */ + ret = 1; + else if (sc_result == ppid) + /* + * Expected in check_ptrace and check_sysemu when they succeed + * in modifying the stack frame + */ + ret = 0; + else + /* Serious trouble! This could be caused by a bug in host 2.6 + * SKAS3/2.6 patch before release -V6, together with a bug in + * the UML code itself. + */ + ret = 2; + + exit(ret); +} + +static void fatal_perror(const char *str) +{ + perror(str); + exit(1); +} + +static void fatal(char *fmt, ...) +{ + va_list list; + + va_start(list, fmt); + vfprintf(stderr, fmt, list); + va_end(list); + + exit(1); +} + +static void non_fatal(char *fmt, ...) +{ + va_list list; + + va_start(list, fmt); + vfprintf(stderr, fmt, list); + va_end(list); +} + +static int start_ptraced_child(void) +{ + int pid, n, status; + + pid = fork(); + if (pid == 0) + ptrace_child(); + else if (pid < 0) + fatal_perror("start_ptraced_child : fork failed"); + + CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED)); + if (n < 0) + fatal_perror("check_ptrace : waitpid failed"); + if (!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGSTOP)) + fatal("check_ptrace : expected SIGSTOP, got status = %d", + status); + + return pid; +} + +/* When testing for SYSEMU support, if it is one of the broken versions, we + * must just avoid using sysemu, not panic, but only if SYSEMU features are + * broken. + * So only for SYSEMU features we test mustpanic, while normal host features + * must work anyway! + */ +static int stop_ptraced_child(int pid, int exitcode, int mustexit) +{ + int status, n, ret = 0; + + if (ptrace(PTRACE_CONT, pid, 0, 0) < 0) { + perror("stop_ptraced_child : ptrace failed"); + return -1; + } + CATCH_EINTR(n = waitpid(pid, &status, 0)); + if (!WIFEXITED(status) || (WEXITSTATUS(status) != exitcode)) { + int exit_with = WEXITSTATUS(status); + if (exit_with == 2) + non_fatal("check_ptrace : child exited with status 2. " + "\nDisabling SYSEMU support.\n"); + non_fatal("check_ptrace : child exited with exitcode %d, while " + "expecting %d; status 0x%x\n", exit_with, + exitcode, status); + if (mustexit) + exit(1); + ret = -1; + } + + return ret; +} + +/* Changed only during early boot */ +int ptrace_faultinfo; +static int disable_ptrace_faultinfo; + +int ptrace_ldt; +static int disable_ptrace_ldt; + +int proc_mm; +static int disable_proc_mm; + +int have_switch_mm; +static int disable_switch_mm; + +int skas_needs_stub; + +static int __init skas0_cmd_param(char *str, int* add) +{ + disable_ptrace_faultinfo = 1; + disable_ptrace_ldt = 1; + disable_proc_mm = 1; + disable_switch_mm = 1; + + return 0; +} + +/* The two __uml_setup would conflict, without this stupid alias. */ + +static int __init mode_skas0_cmd_param(char *str, int* add) + __attribute__((alias("skas0_cmd_param"))); + +__uml_setup("skas0", skas0_cmd_param, +"skas0\n" +" Disables SKAS3 and SKAS4 usage, so that SKAS0 is used\n\n"); + +__uml_setup("mode=skas0", mode_skas0_cmd_param, +"mode=skas0\n" +" Disables SKAS3 and SKAS4 usage, so that SKAS0 is used.\n\n"); + +/* Changed only during early boot */ +static int force_sysemu_disabled = 0; + +static int __init nosysemu_cmd_param(char *str, int* add) +{ + force_sysemu_disabled = 1; + return 0; +} + +__uml_setup("nosysemu", nosysemu_cmd_param, +"nosysemu\n" +" Turns off syscall emulation patch for ptrace (SYSEMU) on.\n" +" SYSEMU is a performance-patch introduced by Laurent Vivier. It changes\n" +" behaviour of ptrace() and helps reducing host context switch rate.\n" +" To make it working, you need a kernel patch for your host, too.\n" +" See http://perso.wanadoo.fr/laurent.vivier/UML/ for further \n" +" information.\n\n"); + +static void __init check_sysemu(void) +{ + unsigned long regs[MAX_REG_NR]; + int pid, n, status, count=0; + + non_fatal("Checking syscall emulation patch for ptrace..."); + sysemu_supported = 0; + pid = start_ptraced_child(); + + if (ptrace(PTRACE_SYSEMU, pid, 0, 0) < 0) + goto fail; + + CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED)); + if (n < 0) + fatal_perror("check_sysemu : wait failed"); + if (!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGTRAP)) + fatal("check_sysemu : expected SIGTRAP, got status = %d\n", + status); + + if (ptrace(PTRACE_GETREGS, pid, 0, regs) < 0) + fatal_perror("check_sysemu : PTRACE_GETREGS failed"); + if (PT_SYSCALL_NR(regs) != __NR_getpid) { + non_fatal("check_sysemu got system call number %d, " + "expected %d...", PT_SYSCALL_NR(regs), __NR_getpid); + goto fail; + } + + n = ptrace(PTRACE_POKEUSR, pid, PT_SYSCALL_RET_OFFSET, os_getpid()); + if (n < 0) { + non_fatal("check_sysemu : failed to modify system call " + "return"); + goto fail; + } + + if (stop_ptraced_child(pid, 0, 0) < 0) + goto fail_stopped; + + sysemu_supported = 1; + non_fatal("OK\n"); + set_using_sysemu(!force_sysemu_disabled); + + non_fatal("Checking advanced syscall emulation patch for ptrace..."); + pid = start_ptraced_child(); + + if ((ptrace(PTRACE_OLDSETOPTIONS, pid, 0, + (void *) PTRACE_O_TRACESYSGOOD) < 0)) + fatal_perror("check_ptrace: PTRACE_OLDSETOPTIONS failed"); + + while (1) { + count++; + if (ptrace(PTRACE_SYSEMU_SINGLESTEP, pid, 0, 0) < 0) + goto fail; + CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED)); + if (n < 0) + fatal_perror("check_ptrace : wait failed"); + + if (WIFSTOPPED(status) && + (WSTOPSIG(status) == (SIGTRAP|0x80))) { + if (!count) { + non_fatal("check_ptrace : SYSEMU_SINGLESTEP " + "doesn't singlestep"); + goto fail; + } + n = ptrace(PTRACE_POKEUSR, pid, PT_SYSCALL_RET_OFFSET, + os_getpid()); + if (n < 0) + fatal_perror("check_sysemu : failed to modify " + "system call return"); + break; + } + else if (WIFSTOPPED(status) && (WSTOPSIG(status) == SIGTRAP)) + count++; + else { + non_fatal("check_ptrace : expected SIGTRAP or " + "(SIGTRAP | 0x80), got status = %d\n", + status); + goto fail; + } + } + if (stop_ptraced_child(pid, 0, 0) < 0) + goto fail_stopped; + + sysemu_supported = 2; + non_fatal("OK\n"); + + if (!force_sysemu_disabled) + set_using_sysemu(sysemu_supported); + return; + +fail: + stop_ptraced_child(pid, 1, 0); +fail_stopped: + non_fatal("missing\n"); +} + +static void __init check_ptrace(void) +{ + int pid, syscall, n, status; + + non_fatal("Checking that ptrace can change system call numbers..."); + pid = start_ptraced_child(); + + if ((ptrace(PTRACE_OLDSETOPTIONS, pid, 0, + (void *) PTRACE_O_TRACESYSGOOD) < 0)) + fatal_perror("check_ptrace: PTRACE_OLDSETOPTIONS failed"); + + while (1) { + if (ptrace(PTRACE_SYSCALL, pid, 0, 0) < 0) + fatal_perror("check_ptrace : ptrace failed"); + + CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED)); + if (n < 0) + fatal_perror("check_ptrace : wait failed"); + + if (!WIFSTOPPED(status) || + (WSTOPSIG(status) != (SIGTRAP | 0x80))) + fatal("check_ptrace : expected (SIGTRAP|0x80), " + "got status = %d", status); + + syscall = ptrace(PTRACE_PEEKUSR, pid, PT_SYSCALL_NR_OFFSET, + 0); + if (syscall == __NR_getpid) { + n = ptrace(PTRACE_POKEUSR, pid, PT_SYSCALL_NR_OFFSET, + __NR_getppid); + if (n < 0) + fatal_perror("check_ptrace : failed to modify " + "system call"); + break; + } + } + stop_ptraced_child(pid, 0, 1); + non_fatal("OK\n"); + check_sysemu(); +} + +extern void check_tmpexec(void); + +static void __init check_coredump_limit(void) +{ + struct rlimit lim; + int err = getrlimit(RLIMIT_CORE, &lim); + + if (err) { + perror("Getting core dump limit"); + return; + } + + printf("Core dump limits :\n\tsoft - "); + if (lim.rlim_cur == RLIM_INFINITY) + printf("NONE\n"); + else printf("%lu\n", lim.rlim_cur); + + printf("\thard - "); + if (lim.rlim_max == RLIM_INFINITY) + printf("NONE\n"); + else printf("%lu\n", lim.rlim_max); +} + +void __init os_early_checks(void) +{ + int pid; + + /* Print out the core dump limits early */ + check_coredump_limit(); + + check_ptrace(); + + /* Need to check this early because mmapping happens before the + * kernel is running. + */ + check_tmpexec(); + + pid = start_ptraced_child(); + if (init_registers(pid)) + fatal("Failed to initialize default registers"); + stop_ptraced_child(pid, 1, 1); +} + +static int __init noprocmm_cmd_param(char *str, int* add) +{ + disable_proc_mm = 1; + return 0; +} + +__uml_setup("noprocmm", noprocmm_cmd_param, +"noprocmm\n" +" Turns off usage of /proc/mm, even if host supports it.\n" +" To support /proc/mm, the host needs to be patched using\n" +" the current skas3 patch.\n\n"); + +static int __init noptracefaultinfo_cmd_param(char *str, int* add) +{ + disable_ptrace_faultinfo = 1; + return 0; +} + +__uml_setup("noptracefaultinfo", noptracefaultinfo_cmd_param, +"noptracefaultinfo\n" +" Turns off usage of PTRACE_FAULTINFO, even if host supports\n" +" it. To support PTRACE_FAULTINFO, the host needs to be patched\n" +" using the current skas3 patch.\n\n"); + +static int __init noptraceldt_cmd_param(char *str, int* add) +{ + disable_ptrace_ldt = 1; + return 0; +} + +__uml_setup("noptraceldt", noptraceldt_cmd_param, +"noptraceldt\n" +" Turns off usage of PTRACE_LDT, even if host supports it.\n" +" To support PTRACE_LDT, the host needs to be patched using\n" +" the current skas3 patch.\n\n"); + +static inline void check_skas3_ptrace_faultinfo(void) +{ + struct ptrace_faultinfo fi; + int pid, n; + + non_fatal(" - PTRACE_FAULTINFO..."); + pid = start_ptraced_child(); + + n = ptrace(PTRACE_FAULTINFO, pid, 0, &fi); + if (n < 0) { + if (errno == EIO) + non_fatal("not found\n"); + else + perror("not found"); + } else if (disable_ptrace_faultinfo) + non_fatal("found but disabled on command line\n"); + else { + ptrace_faultinfo = 1; + non_fatal("found\n"); + } + + stop_ptraced_child(pid, 1, 1); +} + +static inline void check_skas3_ptrace_ldt(void) +{ +#ifdef PTRACE_LDT + int pid, n; + unsigned char ldtbuf[40]; + struct ptrace_ldt ldt_op = (struct ptrace_ldt) { + .func = 2, /* read default ldt */ + .ptr = ldtbuf, + .bytecount = sizeof(ldtbuf)}; + + non_fatal(" - PTRACE_LDT..."); + pid = start_ptraced_child(); + + n = ptrace(PTRACE_LDT, pid, 0, (unsigned long) &ldt_op); + if (n < 0) { + if (errno == EIO) + non_fatal("not found\n"); + else + perror("not found"); + } else if (disable_ptrace_ldt) + non_fatal("found, but use is disabled\n"); + else { + ptrace_ldt = 1; + non_fatal("found\n"); + } + + stop_ptraced_child(pid, 1, 1); +#endif +} + +static inline void check_skas3_proc_mm(void) +{ + non_fatal(" - /proc/mm..."); + if (access("/proc/mm", W_OK) < 0) + perror("not found"); + else if (disable_proc_mm) + non_fatal("found but disabled on command line\n"); + else { + proc_mm = 1; + non_fatal("found\n"); + } +} + +void can_do_skas(void) +{ + non_fatal("Checking for the skas3 patch in the host:\n"); + + check_skas3_proc_mm(); + check_skas3_ptrace_faultinfo(); + check_skas3_ptrace_ldt(); + + if (!proc_mm || !ptrace_faultinfo || !ptrace_ldt) + skas_needs_stub = 1; +} + +int __init parse_iomem(char *str, int *add) +{ + struct iomem_region *new; + struct stat64 buf; + char *file, *driver; + int fd, size; + + driver = str; + file = strchr(str,','); + if (file == NULL) { + fprintf(stderr, "parse_iomem : failed to parse iomem\n"); + goto out; + } + *file = '\0'; + file++; + fd = open(file, O_RDWR, 0); + if (fd < 0) { + perror("parse_iomem - Couldn't open io file"); + goto out; + } + + if (fstat64(fd, &buf) < 0) { + perror("parse_iomem - cannot stat_fd file"); + goto out_close; + } + + new = malloc(sizeof(*new)); + if (new == NULL) { + perror("Couldn't allocate iomem_region struct"); + goto out_close; + } + + size = (buf.st_size + UM_KERN_PAGE_SIZE) & ~(UM_KERN_PAGE_SIZE - 1); + + *new = ((struct iomem_region) { .next = iomem_regions, + .driver = driver, + .fd = fd, + .size = size, + .phys = 0, + .virt = 0 }); + iomem_regions = new; + iomem_size += new->size + UM_KERN_PAGE_SIZE; + + return 0; + out_close: + close(fd); + out: + return 1; +} diff --git a/arch/um/os-Linux/sys-i386/Makefile b/arch/um/os-Linux/sys-i386/Makefile new file mode 100644 index 0000000..b4bc6ac --- /dev/null +++ b/arch/um/os-Linux/sys-i386/Makefile @@ -0,0 +1,10 @@ +# +# Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) +# Licensed under the GPL +# + +obj-y = registers.o signal.o task_size.o tls.o + +USER_OBJS := $(obj-y) + +include arch/um/scripts/Makefile.rules diff --git a/arch/um/os-Linux/sys-i386/registers.c b/arch/um/os-Linux/sys-i386/registers.c new file mode 100644 index 0000000..229f7a5 --- /dev/null +++ b/arch/um/os-Linux/sys-i386/registers.c @@ -0,0 +1,91 @@ +/* + * Copyright (C) 2004 PathScale, Inc + * Copyright (C) 2004 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include <errno.h> +#include <sys/ptrace.h> +#include <sys/user.h> +#include "kern_constants.h" +#include "longjmp.h" +#include "user.h" +#include "sysdep/ptrace_user.h" + +int save_fp_registers(int pid, unsigned long *fp_regs) +{ + if (ptrace(PTRACE_GETFPREGS, pid, 0, fp_regs) < 0) + return -errno; + return 0; +} + +int restore_fp_registers(int pid, unsigned long *fp_regs) +{ + if (ptrace(PTRACE_SETFPREGS, pid, 0, fp_regs) < 0) + return -errno; + return 0; +} + +int save_fpx_registers(int pid, unsigned long *fp_regs) +{ + if (ptrace(PTRACE_GETFPXREGS, pid, 0, fp_regs) < 0) + return -errno; + return 0; +} + +int restore_fpx_registers(int pid, unsigned long *fp_regs) +{ + if (ptrace(PTRACE_SETFPXREGS, pid, 0, fp_regs) < 0) + return -errno; + return 0; +} + +unsigned long get_thread_reg(int reg, jmp_buf *buf) +{ + switch (reg) { + case EIP: + return buf[0]->__eip; + case UESP: + return buf[0]->__esp; + case EBP: + return buf[0]->__ebp; + default: + printk(UM_KERN_ERR "get_thread_regs - unknown register %d\n", + reg); + return 0; + } +} + +int have_fpx_regs = 1; + +int get_fp_registers(int pid, unsigned long *regs) +{ + if (have_fpx_regs) + return save_fpx_registers(pid, regs); + else + return save_fp_registers(pid, regs); +} + +int put_fp_registers(int pid, unsigned long *regs) +{ + if (have_fpx_regs) + return restore_fpx_registers(pid, regs); + else + return restore_fp_registers(pid, regs); +} + +void arch_init_registers(int pid) +{ + struct user_fpxregs_struct fpx_regs; + int err; + + err = ptrace(PTRACE_GETFPXREGS, pid, 0, &fpx_regs); + if (!err) + return; + + if (errno != EIO) + panic("check_ptrace : PTRACE_GETFPXREGS failed, errno = %d", + errno); + + have_fpx_regs = 0; +} diff --git a/arch/um/os-Linux/sys-i386/signal.c b/arch/um/os-Linux/sys-i386/signal.c new file mode 100644 index 0000000..f311609 --- /dev/null +++ b/arch/um/os-Linux/sys-i386/signal.c @@ -0,0 +1,13 @@ +/* + * Copyright (C) 2006 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include <signal.h> + +extern void handle_signal(int sig, struct sigcontext *sc); + +void hard_handler(int sig) +{ + handle_signal(sig, (struct sigcontext *) (&sig + 1)); +} diff --git a/arch/um/os-Linux/sys-i386/task_size.c b/arch/um/os-Linux/sys-i386/task_size.c new file mode 100644 index 0000000..be04c1e --- /dev/null +++ b/arch/um/os-Linux/sys-i386/task_size.c @@ -0,0 +1,139 @@ +#include <stdio.h> +#include <stdlib.h> +#include <signal.h> +#include <sys/mman.h> +#include "longjmp.h" +#include "kern_constants.h" + +static jmp_buf buf; + +static void segfault(int sig) +{ + longjmp(buf, 1); +} + +static int page_ok(unsigned long page) +{ + unsigned long *address = (unsigned long *) (page << UM_KERN_PAGE_SHIFT); + unsigned long n = ~0UL; + void *mapped = NULL; + int ok = 0; + + /* + * First see if the page is readable. If it is, it may still + * be a VDSO, so we go on to see if it's writable. If not + * then try mapping memory there. If that fails, then we're + * still in the kernel area. As a sanity check, we'll fail if + * the mmap succeeds, but gives us an address different from + * what we wanted. + */ + if (setjmp(buf) == 0) + n = *address; + else { + mapped = mmap(address, UM_KERN_PAGE_SIZE, + PROT_READ | PROT_WRITE, + MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (mapped == MAP_FAILED) + return 0; + if (mapped != address) + goto out; + } + + /* + * Now, is it writeable? If so, then we're in user address + * space. If not, then try mprotecting it and try the write + * again. + */ + if (setjmp(buf) == 0) { + *address = n; + ok = 1; + goto out; + } else if (mprotect(address, UM_KERN_PAGE_SIZE, + PROT_READ | PROT_WRITE) != 0) + goto out; + + if (setjmp(buf) == 0) { + *address = n; + ok = 1; + } + + out: + if (mapped != NULL) + munmap(mapped, UM_KERN_PAGE_SIZE); + return ok; +} + +unsigned long os_get_top_address(void) +{ + struct sigaction sa, old; + unsigned long bottom = 0; + /* + * A 32-bit UML on a 64-bit host gets confused about the VDSO at + * 0xffffe000. It is mapped, is readable, can be reprotected writeable + * and written. However, exec discovers later that it can't be + * unmapped. So, just set the highest address to be checked to just + * below it. This might waste some address space on 4G/4G 32-bit + * hosts, but shouldn't hurt otherwise. + */ + unsigned long top = 0xffffd000 >> UM_KERN_PAGE_SHIFT; + unsigned long test, original; + + printf("Locating the bottom of the address space ... "); + fflush(stdout); + + /* + * We're going to be longjmping out of the signal handler, so + * SA_DEFER needs to be set. + */ + sa.sa_handler = segfault; + sigemptyset(&sa.sa_mask); + sa.sa_flags = SA_NODEFER; + if (sigaction(SIGSEGV, &sa, &old)) { + perror("os_get_top_address"); + exit(1); + } + + /* Manually scan the address space, bottom-up, until we find + * the first valid page (or run out of them). + */ + for (bottom = 0; bottom < top; bottom++) { + if (page_ok(bottom)) + break; + } + + /* If we've got this far, we ran out of pages. */ + if (bottom == top) { + fprintf(stderr, "Unable to determine bottom of address " + "space.\n"); + exit(1); + } + + printf("0x%x\n", bottom << UM_KERN_PAGE_SHIFT); + printf("Locating the top of the address space ... "); + fflush(stdout); + + original = bottom; + + /* This could happen with a 4G/4G split */ + if (page_ok(top)) + goto out; + + do { + test = bottom + (top - bottom) / 2; + if (page_ok(test)) + bottom = test; + else + top = test; + } while (top - bottom > 1); + +out: + /* Restore the old SIGSEGV handling */ + if (sigaction(SIGSEGV, &old, NULL)) { + perror("os_get_top_address"); + exit(1); + } + top <<= UM_KERN_PAGE_SHIFT; + printf("0x%x\n", top); + + return top; +} diff --git a/arch/um/os-Linux/sys-i386/tls.c b/arch/um/os-Linux/sys-i386/tls.c new file mode 100644 index 0000000..32ed41e --- /dev/null +++ b/arch/um/os-Linux/sys-i386/tls.c @@ -0,0 +1,36 @@ +#include <errno.h> +#include <linux/unistd.h> + +#include <sys/syscall.h> +#include <unistd.h> + +#include "sysdep/tls.h" +#include "user.h" + +/* Checks whether host supports TLS, and sets *tls_min according to the value + * valid on the host. + * i386 host have it == 6; x86_64 host have it == 12, for i386 emulation. */ +void check_host_supports_tls(int *supports_tls, int *tls_min) { + /* Values for x86 and x86_64.*/ + int val[] = {GDT_ENTRY_TLS_MIN_I386, GDT_ENTRY_TLS_MIN_X86_64}; + int i; + + for (i = 0; i < ARRAY_SIZE(val); i++) { + user_desc_t info; + info.entry_number = val[i]; + + if (syscall(__NR_get_thread_area, &info) == 0) { + *tls_min = val[i]; + *supports_tls = 1; + return; + } else { + if (errno == EINVAL) + continue; + else if (errno == ENOSYS) + *supports_tls = 0; + return; + } + } + + *supports_tls = 0; +} diff --git a/arch/um/os-Linux/sys-x86_64/Makefile b/arch/um/os-Linux/sys-x86_64/Makefile new file mode 100644 index 0000000..a44a47f --- /dev/null +++ b/arch/um/os-Linux/sys-x86_64/Makefile @@ -0,0 +1,10 @@ +# +# Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) +# Licensed under the GPL +# + +obj-y = registers.o prctl.o signal.o task_size.o + +USER_OBJS := $(obj-y) + +include arch/um/scripts/Makefile.rules diff --git a/arch/um/os-Linux/sys-x86_64/prctl.c b/arch/um/os-Linux/sys-x86_64/prctl.c new file mode 100644 index 0000000..9d34edd --- /dev/null +++ b/arch/um/os-Linux/sys-x86_64/prctl.c @@ -0,0 +1,12 @@ +/* + * Copyright (C) 2007 Jeff Dike (jdike@{addtoit.com,linux.intel.com}) + * Licensed under the GPL + */ + +#include <sys/ptrace.h> +#include <linux/ptrace.h> + +int os_arch_prctl(int pid, int code, unsigned long *addr) +{ + return ptrace(PTRACE_ARCH_PRCTL, pid, (unsigned long) addr, code); +} diff --git a/arch/um/os-Linux/sys-x86_64/registers.c b/arch/um/os-Linux/sys-x86_64/registers.c new file mode 100644 index 0000000..594d97a --- /dev/null +++ b/arch/um/os-Linux/sys-x86_64/registers.c @@ -0,0 +1,52 @@ +/* + * Copyright (C) 2006 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include <errno.h> +#include <sys/ptrace.h> +#define __FRAME_OFFSETS +#include <asm/ptrace.h> +#include "kern_constants.h" +#include "longjmp.h" +#include "user.h" + +int save_fp_registers(int pid, unsigned long *fp_regs) +{ + if (ptrace(PTRACE_GETFPREGS, pid, 0, fp_regs) < 0) + return -errno; + return 0; +} + +int restore_fp_registers(int pid, unsigned long *fp_regs) +{ + if (ptrace(PTRACE_SETFPREGS, pid, 0, fp_regs) < 0) + return -errno; + return 0; +} + +unsigned long get_thread_reg(int reg, jmp_buf *buf) +{ + switch (reg) { + case RIP: + return buf[0]->__rip; + case RSP: + return buf[0]->__rsp; + case RBP: + return buf[0]->__rbp; + default: + printk(UM_KERN_ERR "get_thread_regs - unknown register %d\n", + reg); + return 0; + } +} + +int get_fp_registers(int pid, unsigned long *regs) +{ + return save_fp_registers(pid, regs); +} + +int put_fp_registers(int pid, unsigned long *regs) +{ + return restore_fp_registers(pid, regs); +} diff --git a/arch/um/os-Linux/sys-x86_64/signal.c b/arch/um/os-Linux/sys-x86_64/signal.c new file mode 100644 index 0000000..82a3888 --- /dev/null +++ b/arch/um/os-Linux/sys-x86_64/signal.c @@ -0,0 +1,16 @@ +/* + * Copyright (C) 2006 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include <signal.h> + +extern void handle_signal(int sig, struct sigcontext *sc); + +void hard_handler(int sig) +{ + struct ucontext *uc; + asm("movq %%rdx, %0" : "=r" (uc)); + + handle_signal(sig, (struct sigcontext *) &uc->uc_mcontext); +} diff --git a/arch/um/os-Linux/sys-x86_64/task_size.c b/arch/um/os-Linux/sys-x86_64/task_size.c new file mode 100644 index 0000000..26a0dd1 --- /dev/null +++ b/arch/um/os-Linux/sys-x86_64/task_size.c @@ -0,0 +1,5 @@ +unsigned long os_get_top_address(unsigned long shift) +{ + /* The old value of CONFIG_TOP_ADDR */ + return 0x7fc0000000; +} diff --git a/arch/um/os-Linux/time.c b/arch/um/os-Linux/time.c new file mode 100644 index 0000000..dec5678 --- /dev/null +++ b/arch/um/os-Linux/time.c @@ -0,0 +1,188 @@ +/* + * Copyright (C) 2000 - 2007 Jeff Dike (jdike{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include <stddef.h> +#include <errno.h> +#include <signal.h> +#include <time.h> +#include <sys/time.h> +#include "kern_constants.h" +#include "kern_util.h" +#include "os.h" +#include "process.h" +#include "user.h" + +int set_interval(void) +{ + int usec = UM_USEC_PER_SEC / UM_HZ; + struct itimerval interval = ((struct itimerval) { { 0, usec }, + { 0, usec } }); + + if (setitimer(ITIMER_VIRTUAL, &interval, NULL) == -1) + return -errno; + + return 0; +} + +int timer_one_shot(int ticks) +{ + unsigned long usec = ticks * UM_USEC_PER_SEC / UM_HZ; + unsigned long sec = usec / UM_USEC_PER_SEC; + struct itimerval interval; + + usec %= UM_USEC_PER_SEC; + interval = ((struct itimerval) { { 0, 0 }, { sec, usec } }); + + if (setitimer(ITIMER_VIRTUAL, &interval, NULL) == -1) + return -errno; + + return 0; +} + +/** + * timeval_to_ns - Convert timeval to nanoseconds + * @ts: pointer to the timeval variable to be converted + * + * Returns the scalar nanosecond representation of the timeval + * parameter. + * + * Ripped from linux/time.h because it's a kernel header, and thus + * unusable from here. + */ +static inline long long timeval_to_ns(const struct timeval *tv) +{ + return ((long long) tv->tv_sec * UM_NSEC_PER_SEC) + + tv->tv_usec * UM_NSEC_PER_USEC; +} + +long long disable_timer(void) +{ + struct itimerval time = ((struct itimerval) { { 0, 0 }, { 0, 0 } }); + int remain, max = UM_NSEC_PER_SEC / UM_HZ; + + if (setitimer(ITIMER_VIRTUAL, &time, &time) < 0) + printk(UM_KERN_ERR "disable_timer - setitimer failed, " + "errno = %d\n", errno); + + remain = timeval_to_ns(&time.it_value); + if (remain > max) + remain = max; + + return remain; +} + +long long os_nsecs(void) +{ + struct timeval tv; + + gettimeofday(&tv, NULL); + return timeval_to_ns(&tv); +} + +#ifdef UML_CONFIG_NO_HZ +static int after_sleep_interval(struct timespec *ts) +{ + return 0; +} + +static void deliver_alarm(void) +{ + alarm_handler(SIGVTALRM, NULL); +} + +static unsigned long long sleep_time(unsigned long long nsecs) +{ + return nsecs; +} + +#else +unsigned long long last_tick; +unsigned long long skew; + +static void deliver_alarm(void) +{ + unsigned long long this_tick = os_nsecs(); + int one_tick = UM_NSEC_PER_SEC / UM_HZ; + + /* Protection against the host's time going backwards */ + if ((last_tick != 0) && (this_tick < last_tick)) + this_tick = last_tick; + + if (last_tick == 0) + last_tick = this_tick - one_tick; + + skew += this_tick - last_tick; + + while (skew >= one_tick) { + alarm_handler(SIGVTALRM, NULL); + skew -= one_tick; + } + + last_tick = this_tick; +} + +static unsigned long long sleep_time(unsigned long long nsecs) +{ + return nsecs > skew ? nsecs - skew : 0; +} + +static inline long long timespec_to_us(const struct timespec *ts) +{ + return ((long long) ts->tv_sec * UM_USEC_PER_SEC) + + ts->tv_nsec / UM_NSEC_PER_USEC; +} + +static int after_sleep_interval(struct timespec *ts) +{ + int usec = UM_USEC_PER_SEC / UM_HZ; + long long start_usecs = timespec_to_us(ts); + struct timeval tv; + struct itimerval interval; + + /* + * It seems that rounding can increase the value returned from + * setitimer to larger than the one passed in. Over time, + * this will cause the remaining time to be greater than the + * tick interval. If this happens, then just reduce the first + * tick to the interval value. + */ + if (start_usecs > usec) + start_usecs = usec; + + start_usecs -= skew / UM_NSEC_PER_USEC; + if (start_usecs < 0) + start_usecs = 0; + + tv = ((struct timeval) { .tv_sec = start_usecs / UM_USEC_PER_SEC, + .tv_usec = start_usecs % UM_USEC_PER_SEC }); + interval = ((struct itimerval) { { 0, usec }, tv }); + + if (setitimer(ITIMER_VIRTUAL, &interval, NULL) == -1) + return -errno; + + return 0; +} +#endif + +void idle_sleep(unsigned long long nsecs) +{ + struct timespec ts; + + /* + * nsecs can come in as zero, in which case, this starts a + * busy loop. To prevent this, reset nsecs to the tick + * interval if it is zero. + */ + if (nsecs == 0) + nsecs = UM_NSEC_PER_SEC / UM_HZ; + + nsecs = sleep_time(nsecs); + ts = ((struct timespec) { .tv_sec = nsecs / UM_NSEC_PER_SEC, + .tv_nsec = nsecs % UM_NSEC_PER_SEC }); + + if (nanosleep(&ts, &ts) == 0) + deliver_alarm(); + after_sleep_interval(&ts); +} diff --git a/arch/um/os-Linux/tls.c b/arch/um/os-Linux/tls.c new file mode 100644 index 0000000..7327780 --- /dev/null +++ b/arch/um/os-Linux/tls.c @@ -0,0 +1,35 @@ +#include <errno.h> +#include <sys/ptrace.h> +#include "sysdep/tls.h" + +/* TLS support - we basically rely on the host's one.*/ + +#ifndef PTRACE_GET_THREAD_AREA +#define PTRACE_GET_THREAD_AREA 25 +#endif + +#ifndef PTRACE_SET_THREAD_AREA +#define PTRACE_SET_THREAD_AREA 26 +#endif + +int os_set_thread_area(user_desc_t *info, int pid) +{ + int ret; + + ret = ptrace(PTRACE_SET_THREAD_AREA, pid, info->entry_number, + (unsigned long) info); + if (ret < 0) + ret = -errno; + return ret; +} + +int os_get_thread_area(user_desc_t *info, int pid) +{ + int ret; + + ret = ptrace(PTRACE_GET_THREAD_AREA, pid, info->entry_number, + (unsigned long) info); + if (ret < 0) + ret = -errno; + return ret; +} diff --git a/arch/um/os-Linux/tty.c b/arch/um/os-Linux/tty.c new file mode 100644 index 0000000..b09ff66 --- /dev/null +++ b/arch/um/os-Linux/tty.c @@ -0,0 +1,62 @@ +/* + * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include <stdlib.h> +#include <unistd.h> +#include <errno.h> +#include <fcntl.h> +#include "kern_constants.h" +#include "kern_util.h" +#include "os.h" +#include "user.h" + +struct grantpt_info { + int fd; + int res; + int err; +}; + +static void grantpt_cb(void *arg) +{ + struct grantpt_info *info = arg; + + info->res = grantpt(info->fd); + info->err = errno; +} + +int get_pty(void) +{ + struct grantpt_info info; + int fd, err; + + fd = open("/dev/ptmx", O_RDWR); + if (fd < 0) { + err = -errno; + printk(UM_KERN_ERR "get_pty : Couldn't open /dev/ptmx - " + "err = %d\n", errno); + return err; + } + + info.fd = fd; + initial_thread_cb(grantpt_cb, &info); + + if (info.res < 0) { + err = -info.err; + printk(UM_KERN_ERR "get_pty : Couldn't grant pty - " + "errno = %d\n", -info.err); + goto out; + } + + if (unlockpt(fd) < 0) { + err = -errno; + printk(UM_KERN_ERR "get_pty : Couldn't unlock pty - " + "errno = %d\n", errno); + goto out; + } + return fd; +out: + close(fd); + return err; +} diff --git a/arch/um/os-Linux/uaccess.c b/arch/um/os-Linux/uaccess.c new file mode 100644 index 0000000..087ed74 --- /dev/null +++ b/arch/um/os-Linux/uaccess.c @@ -0,0 +1,32 @@ +/* + * Copyright (C) 2001 Chris Emerson (cemerson@chiark.greenend.org.uk) + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include <stddef.h> +#include "longjmp.h" + +unsigned long __do_user_copy(void *to, const void *from, int n, + void **fault_addr, jmp_buf **fault_catcher, + void (*op)(void *to, const void *from, + int n), int *faulted_out) +{ + unsigned long *faddrp = (unsigned long *) fault_addr, ret; + + jmp_buf jbuf; + *fault_catcher = &jbuf; + if (UML_SETJMP(&jbuf) == 0) { + (*op)(to, from, n); + ret = 0; + *faulted_out = 0; + } + else { + ret = *faddrp; + *faulted_out = 1; + } + *fault_addr = NULL; + *fault_catcher = NULL; + return ret; +} + diff --git a/arch/um/os-Linux/umid.c b/arch/um/os-Linux/umid.c new file mode 100644 index 0000000..a27defb --- /dev/null +++ b/arch/um/os-Linux/umid.c @@ -0,0 +1,396 @@ +/* + * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include <stdio.h> +#include <stdlib.h> +#include <dirent.h> +#include <errno.h> +#include <fcntl.h> +#include <signal.h> +#include <string.h> +#include <unistd.h> +#include <sys/stat.h> +#include "init.h" +#include "kern_constants.h" +#include "os.h" +#include "user.h" + +#define UML_DIR "~/.uml/" + +#define UMID_LEN 64 + +/* Changed by set_umid, which is run early in boot */ +static char umid[UMID_LEN] = { 0 }; + +/* Changed by set_uml_dir and make_uml_dir, which are run early in boot */ +static char *uml_dir = UML_DIR; + +static int __init make_uml_dir(void) +{ + char dir[512] = { '\0' }; + int len, err; + + if (*uml_dir == '~') { + char *home = getenv("HOME"); + + err = -ENOENT; + if (home == NULL) { + printk(UM_KERN_ERR "make_uml_dir : no value in " + "environment for $HOME\n"); + goto err; + } + strlcpy(dir, home, sizeof(dir)); + uml_dir++; + } + strlcat(dir, uml_dir, sizeof(dir)); + len = strlen(dir); + if (len > 0 && dir[len - 1] != '/') + strlcat(dir, "/", sizeof(dir)); + + err = -ENOMEM; + uml_dir = malloc(strlen(dir) + 1); + if (uml_dir == NULL) { + printf("make_uml_dir : malloc failed, errno = %d\n", errno); + goto err; + } + strcpy(uml_dir, dir); + + if ((mkdir(uml_dir, 0777) < 0) && (errno != EEXIST)) { + printf("Failed to mkdir '%s': %s\n", uml_dir, strerror(errno)); + err = -errno; + goto err_free; + } + return 0; + +err_free: + free(uml_dir); +err: + uml_dir = NULL; + return err; +} + +/* + * Unlinks the files contained in @dir and then removes @dir. + * Doesn't handle directory trees, so it's not like rm -rf, but almost such. We + * ignore ENOENT errors for anything (they happen, strangely enough - possibly + * due to races between multiple dying UML threads). + */ +static int remove_files_and_dir(char *dir) +{ + DIR *directory; + struct dirent *ent; + int len; + char file[256]; + int ret; + + directory = opendir(dir); + if (directory == NULL) { + if (errno != ENOENT) + return -errno; + else + return 0; + } + + while ((ent = readdir(directory)) != NULL) { + if (!strcmp(ent->d_name, ".") || !strcmp(ent->d_name, "..")) + continue; + len = strlen(dir) + sizeof("/") + strlen(ent->d_name) + 1; + if (len > sizeof(file)) { + ret = -E2BIG; + goto out; + } + + sprintf(file, "%s/%s", dir, ent->d_name); + if (unlink(file) < 0 && errno != ENOENT) { + ret = -errno; + goto out; + } + } + + if (rmdir(dir) < 0 && errno != ENOENT) { + ret = -errno; + goto out; + } + + ret = 0; +out: + closedir(directory); + return ret; +} + +/* + * This says that there isn't already a user of the specified directory even if + * there are errors during the checking. This is because if these errors + * happen, the directory is unusable by the pre-existing UML, so we might as + * well take it over. This could happen either by + * the existing UML somehow corrupting its umid directory + * something other than UML sticking stuff in the directory + * this boot racing with a shutdown of the other UML + * In any of these cases, the directory isn't useful for anything else. + * + * Boolean return: 1 if in use, 0 otherwise. + */ +static inline int is_umdir_used(char *dir) +{ + char file[strlen(uml_dir) + UMID_LEN + sizeof("/pid\0")]; + char pid[sizeof("nnnnn\0")], *end; + int dead, fd, p, n, err; + + n = snprintf(file, sizeof(file), "%s/pid", dir); + if (n >= sizeof(file)) { + printk(UM_KERN_ERR "is_umdir_used - pid filename too long\n"); + err = -E2BIG; + goto out; + } + + dead = 0; + fd = open(file, O_RDONLY); + if (fd < 0) { + fd = -errno; + if (fd != -ENOENT) { + printk(UM_KERN_ERR "is_umdir_used : couldn't open pid " + "file '%s', err = %d\n", file, -fd); + } + goto out; + } + + err = 0; + n = read(fd, pid, sizeof(pid)); + if (n < 0) { + printk(UM_KERN_ERR "is_umdir_used : couldn't read pid file " + "'%s', err = %d\n", file, errno); + goto out_close; + } else if (n == 0) { + printk(UM_KERN_ERR "is_umdir_used : couldn't read pid file " + "'%s', 0-byte read\n", file); + goto out_close; + } + + p = strtoul(pid, &end, 0); + if (end == pid) { + printk(UM_KERN_ERR "is_umdir_used : couldn't parse pid file " + "'%s', errno = %d\n", file, errno); + goto out_close; + } + + if ((kill(p, 0) == 0) || (errno != ESRCH)) { + printk(UM_KERN_ERR "umid \"%s\" is already in use by pid %d\n", + umid, p); + return 1; + } + +out_close: + close(fd); +out: + return 0; +} + +/* + * Try to remove the directory @dir unless it's in use. + * Precondition: @dir exists. + * Returns 0 for success, < 0 for failure in removal or if the directory is in + * use. + */ +static int umdir_take_if_dead(char *dir) +{ + int ret; + if (is_umdir_used(dir)) + return -EEXIST; + + ret = remove_files_and_dir(dir); + if (ret) { + printk(UM_KERN_ERR "is_umdir_used - remove_files_and_dir " + "failed with err = %d\n", ret); + } + return ret; +} + +static void __init create_pid_file(void) +{ + char file[strlen(uml_dir) + UMID_LEN + sizeof("/pid\0")]; + char pid[sizeof("nnnnn\0")]; + int fd, n; + + if (umid_file_name("pid", file, sizeof(file))) + return; + + fd = open(file, O_RDWR | O_CREAT | O_EXCL, 0644); + if (fd < 0) { + printk(UM_KERN_ERR "Open of machine pid file \"%s\" failed: " + "%s\n", file, strerror(errno)); + return; + } + + snprintf(pid, sizeof(pid), "%d\n", getpid()); + n = write(fd, pid, strlen(pid)); + if (n != strlen(pid)) + printk(UM_KERN_ERR "Write of pid file failed - err = %d\n", + errno); + + close(fd); +} + +int __init set_umid(char *name) +{ + if (strlen(name) > UMID_LEN - 1) + return -E2BIG; + + strlcpy(umid, name, sizeof(umid)); + + return 0; +} + +/* Changed in make_umid, which is called during early boot */ +static int umid_setup = 0; + +static int __init make_umid(void) +{ + int fd, err; + char tmp[256]; + + if (umid_setup) + return 0; + + make_uml_dir(); + + if (*umid == '\0') { + strlcpy(tmp, uml_dir, sizeof(tmp)); + strlcat(tmp, "XXXXXX", sizeof(tmp)); + fd = mkstemp(tmp); + if (fd < 0) { + printk(UM_KERN_ERR "make_umid - mkstemp(%s) failed: " + "%s\n", tmp, strerror(errno)); + err = -errno; + goto err; + } + + close(fd); + + set_umid(&tmp[strlen(uml_dir)]); + + /* + * There's a nice tiny little race between this unlink and + * the mkdir below. It'd be nice if there were a mkstemp + * for directories. + */ + if (unlink(tmp)) { + err = -errno; + goto err; + } + } + + snprintf(tmp, sizeof(tmp), "%s%s", uml_dir, umid); + err = mkdir(tmp, 0777); + if (err < 0) { + err = -errno; + if (err != -EEXIST) + goto err; + + if (umdir_take_if_dead(tmp) < 0) + goto err; + + err = mkdir(tmp, 0777); + } + if (err) { + err = -errno; + printk(UM_KERN_ERR "Failed to create '%s' - err = %d\n", umid, + errno); + goto err; + } + + umid_setup = 1; + + create_pid_file(); + + err = 0; + err: + return err; +} + +static int __init make_umid_init(void) +{ + if (!make_umid()) + return 0; + + /* + * If initializing with the given umid failed, then try again with + * a random one. + */ + printk(UM_KERN_ERR "Failed to initialize umid \"%s\", trying with a " + "random umid\n", umid); + *umid = '\0'; + make_umid(); + + return 0; +} + +__initcall(make_umid_init); + +int __init umid_file_name(char *name, char *buf, int len) +{ + int n, err; + + err = make_umid(); + if (err) + return err; + + n = snprintf(buf, len, "%s%s/%s", uml_dir, umid, name); + if (n >= len) { + printk(UM_KERN_ERR "umid_file_name : buffer too short\n"); + return -E2BIG; + } + + return 0; +} + +char *get_umid(void) +{ + return umid; +} + +static int __init set_uml_dir(char *name, int *add) +{ + if (*name == '\0') { + printf("uml_dir can't be an empty string\n"); + return 0; + } + + if (name[strlen(name) - 1] == '/') { + uml_dir = name; + return 0; + } + + uml_dir = malloc(strlen(name) + 2); + if (uml_dir == NULL) { + printf("Failed to malloc uml_dir - error = %d\n", errno); + + /* + * Return 0 here because do_initcalls doesn't look at + * the return value. + */ + return 0; + } + sprintf(uml_dir, "%s/", name); + + return 0; +} + +__uml_setup("uml_dir=", set_uml_dir, +"uml_dir=<directory>\n" +" The location to place the pid and umid files.\n\n" +); + +static void remove_umid_dir(void) +{ + char dir[strlen(uml_dir) + UMID_LEN + 1], err; + + sprintf(dir, "%s%s", uml_dir, umid); + err = remove_files_and_dir(dir); + if (err) + printf("remove_umid_dir - remove_files_and_dir failed with " + "err = %d\n", err); +} + +__uml_exitcall(remove_umid_dir); diff --git a/arch/um/os-Linux/user_syms.c b/arch/um/os-Linux/user_syms.c new file mode 100644 index 0000000..74f49bb --- /dev/null +++ b/arch/um/os-Linux/user_syms.c @@ -0,0 +1,108 @@ +#include "linux/types.h" +#include "linux/module.h" + +/* Some of this are builtin function (some are not but could in the future), + * so I *must* declare good prototypes for them and then EXPORT them. + * The kernel code uses the macro defined by include/linux/string.h, + * so I undef macros; the userspace code does not include that and I + * add an EXPORT for the glibc one. + */ + +#undef strlen +#undef strstr +#undef memcpy +#undef memset + +extern size_t strlen(const char *); +extern void *memcpy(void *, const void *, size_t); +extern void *memmove(void *, const void *, size_t); +extern void *memset(void *, int, size_t); +extern int printf(const char *, ...); + +/* If it's not defined, the export is included in lib/string.c.*/ +#ifdef __HAVE_ARCH_STRSTR +EXPORT_SYMBOL(strstr); +#endif + +EXPORT_SYMBOL(memcpy); +EXPORT_SYMBOL(memmove); +EXPORT_SYMBOL(memset); +EXPORT_SYMBOL(printf); + +/* Here, instead, I can provide a fake prototype. Yes, someone cares: genksyms. + * However, the modules will use the CRC defined *here*, no matter if it is + * good; so the versions of these symbols will always match + */ +#define EXPORT_SYMBOL_PROTO(sym) \ + int sym(void); \ + EXPORT_SYMBOL(sym); + +extern void readdir64(void) __attribute__((weak)); +EXPORT_SYMBOL(readdir64); +extern void truncate64(void) __attribute__((weak)); +EXPORT_SYMBOL(truncate64); + +#ifdef SUBARCH_i386 +EXPORT_SYMBOL(vsyscall_ehdr); +EXPORT_SYMBOL(vsyscall_end); +#endif + +EXPORT_SYMBOL_PROTO(__errno_location); + +EXPORT_SYMBOL_PROTO(access); +EXPORT_SYMBOL_PROTO(open); +EXPORT_SYMBOL_PROTO(open64); +EXPORT_SYMBOL_PROTO(close); +EXPORT_SYMBOL_PROTO(read); +EXPORT_SYMBOL_PROTO(write); +EXPORT_SYMBOL_PROTO(dup2); +EXPORT_SYMBOL_PROTO(__xstat); +EXPORT_SYMBOL_PROTO(__lxstat); +EXPORT_SYMBOL_PROTO(__lxstat64); +EXPORT_SYMBOL_PROTO(__fxstat64); +EXPORT_SYMBOL_PROTO(lseek); +EXPORT_SYMBOL_PROTO(lseek64); +EXPORT_SYMBOL_PROTO(chown); +EXPORT_SYMBOL_PROTO(fchown); +EXPORT_SYMBOL_PROTO(truncate); +EXPORT_SYMBOL_PROTO(ftruncate64); +EXPORT_SYMBOL_PROTO(utime); +EXPORT_SYMBOL_PROTO(utimes); +EXPORT_SYMBOL_PROTO(futimes); +EXPORT_SYMBOL_PROTO(chmod); +EXPORT_SYMBOL_PROTO(fchmod); +EXPORT_SYMBOL_PROTO(rename); +EXPORT_SYMBOL_PROTO(__xmknod); + +EXPORT_SYMBOL_PROTO(symlink); +EXPORT_SYMBOL_PROTO(link); +EXPORT_SYMBOL_PROTO(unlink); +EXPORT_SYMBOL_PROTO(readlink); + +EXPORT_SYMBOL_PROTO(mkdir); +EXPORT_SYMBOL_PROTO(rmdir); +EXPORT_SYMBOL_PROTO(opendir); +EXPORT_SYMBOL_PROTO(readdir); +EXPORT_SYMBOL_PROTO(closedir); +EXPORT_SYMBOL_PROTO(seekdir); +EXPORT_SYMBOL_PROTO(telldir); + +EXPORT_SYMBOL_PROTO(ioctl); + +EXPORT_SYMBOL_PROTO(pread64); +EXPORT_SYMBOL_PROTO(pwrite64); + +EXPORT_SYMBOL_PROTO(statfs); +EXPORT_SYMBOL_PROTO(statfs64); + +EXPORT_SYMBOL_PROTO(getuid); + +EXPORT_SYMBOL_PROTO(fsync); +EXPORT_SYMBOL_PROTO(fdatasync); + +/* Export symbols used by GCC for the stack protector. */ +extern void __stack_smash_handler(void *) __attribute__((weak)); +EXPORT_SYMBOL(__stack_smash_handler); + +extern long __guard __attribute__((weak)); +EXPORT_SYMBOL(__guard); diff --git a/arch/um/os-Linux/util.c b/arch/um/os-Linux/util.c new file mode 100644 index 0000000..6ea7797 --- /dev/null +++ b/arch/um/os-Linux/util.c @@ -0,0 +1,120 @@ +/* + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> +#include <signal.h> +#include <string.h> +#include <termios.h> +#include <wait.h> +#include <sys/mman.h> +#include <sys/utsname.h> +#include "kern_constants.h" +#include "os.h" +#include "user.h" + +void stack_protections(unsigned long address) +{ + if (mprotect((void *) address, UM_THREAD_SIZE, + PROT_READ | PROT_WRITE | PROT_EXEC) < 0) + panic("protecting stack failed, errno = %d", errno); +} + +int raw(int fd) +{ + struct termios tt; + int err; + + CATCH_EINTR(err = tcgetattr(fd, &tt)); + if (err < 0) + return -errno; + + cfmakeraw(&tt); + + CATCH_EINTR(err = tcsetattr(fd, TCSADRAIN, &tt)); + if (err < 0) + return -errno; + + /* + * XXX tcsetattr could have applied only some changes + * (and cfmakeraw() is a set of changes) + */ + return 0; +} + +void setup_machinename(char *machine_out) +{ + struct utsname host; + + uname(&host); +#ifdef UML_CONFIG_UML_X86 +# ifndef UML_CONFIG_64BIT + if (!strcmp(host.machine, "x86_64")) { + strcpy(machine_out, "i686"); + return; + } +# else + if (!strcmp(host.machine, "i686")) { + strcpy(machine_out, "x86_64"); + return; + } +# endif +#endif + strcpy(machine_out, host.machine); +} + +void setup_hostinfo(char *buf, int len) +{ + struct utsname host; + + uname(&host); + snprintf(buf, len, "%s %s %s %s %s", host.sysname, host.nodename, + host.release, host.version, host.machine); +} + +void os_dump_core(void) +{ + int pid; + + signal(SIGSEGV, SIG_DFL); + + /* + * We are about to SIGTERM this entire process group to ensure that + * nothing is around to run after the kernel exits. The + * kernel wants to abort, not die through SIGTERM, so we + * ignore it here. + */ + + signal(SIGTERM, SIG_IGN); + kill(0, SIGTERM); + /* + * Most of the other processes associated with this UML are + * likely sTopped, so give them a SIGCONT so they see the + * SIGTERM. + */ + kill(0, SIGCONT); + + /* + * Now, having sent signals to everyone but us, make sure they + * die by ptrace. Processes can survive what's been done to + * them so far - the mechanism I understand is receiving a + * SIGSEGV and segfaulting immediately upon return. There is + * always a SIGSEGV pending, and (I'm guessing) signals are + * processed in numeric order so the SIGTERM (signal 15 vs + * SIGSEGV being signal 11) is never handled. + * + * Run a waitpid loop until we get some kind of error. + * Hopefully, it's ECHILD, but there's not a lot we can do if + * it's something else. Tell os_kill_ptraced_process not to + * wait for the child to report its death because there's + * nothing reasonable to do if that fails. + */ + + while ((pid = waitpid(-1, NULL, WNOHANG | __WALL)) > 0) + os_kill_ptraced_process(pid, 0); + + abort(); +} diff --git a/arch/um/scripts/Makefile.rules b/arch/um/scripts/Makefile.rules new file mode 100644 index 0000000..61107b6 --- /dev/null +++ b/arch/um/scripts/Makefile.rules @@ -0,0 +1,32 @@ +# =========================================================================== +# arch/um: Generic definitions +# =========================================================================== + +USER_SINGLE_OBJS := \ + $(foreach f,$(patsubst %.o,%,$(obj-y) $(obj-m)),$($(f)-objs)) +USER_OBJS += $(filter %_user.o,$(obj-y) $(obj-m) $(USER_SINGLE_OBJS)) +USER_OBJS := $(foreach file,$(USER_OBJS),$(obj)/$(file)) + +$(USER_OBJS:.o=.%): \ + c_flags = -Wp,-MD,$(depfile) $(USER_CFLAGS) $(CFLAGS_$(basetarget).o) +$(USER_OBJS) : CHECKFLAGS := -D__linux__ -Dlinux -D__STDC__ \ + -Dunix -D__unix__ -D__$(SUBARCH)__ $(CF) + +# These are like USER_OBJS but filter USER_CFLAGS through unprofile instead of +# using it directly. +UNPROFILE_OBJS := $(foreach file,$(UNPROFILE_OBJS),$(obj)/$(file)) + +$(UNPROFILE_OBJS:.o=.%): \ + c_flags = -Wp,-MD,$(depfile) $(call unprofile,$(USER_CFLAGS)) $(CFLAGS_$(basetarget).o) +$(UNPROFILE_OBJS) : CHECKFLAGS := -D__linux__ -Dlinux -D__STDC__ \ + -Dunix -D__unix__ -D__$(SUBARCH)__ $(CF) + +# The stubs can't try to call mcount or update basic block data +define unprofile + $(patsubst -pg,,$(patsubst -fprofile-arcs -ftest-coverage,,$(1))) +endef + +ifdef subarch-obj-y +obj-y += subarch.o +subarch-y = $(addprefix ../../$(HEADER_ARCH)/,$(subarch-obj-y)) +endif diff --git a/arch/um/sys-i386/Makefile b/arch/um/sys-i386/Makefile new file mode 100644 index 0000000..598b5c1 --- /dev/null +++ b/arch/um/sys-i386/Makefile @@ -0,0 +1,21 @@ +# +# Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) +# + +obj-y = bug.o bugs.o checksum.o delay.o fault.o ksyms.o ldt.o ptrace.o \ + ptrace_user.o setjmp.o signal.o stub.o stub_segv.o syscalls.o sysrq.o \ + sys_call_table.o tls.o + +subarch-obj-y = lib/semaphore_32.o lib/string_32.o +subarch-obj-$(CONFIG_HIGHMEM) += mm/highmem_32.o +subarch-obj-$(CONFIG_MODULES) += kernel/module_32.o + +USER_OBJS := bugs.o ptrace_user.o fault.o + +USER_OBJS += user-offsets.s +extra-y += user-offsets.s + +UNPROFILE_OBJS := stub_segv.o +CFLAGS_stub_segv.o := $(CFLAGS_NO_HARDENING) + +include arch/um/scripts/Makefile.rules diff --git a/arch/um/sys-i386/asm/archparam.h b/arch/um/sys-i386/asm/archparam.h new file mode 100644 index 0000000..93fd723 --- /dev/null +++ b/arch/um/sys-i386/asm/archparam.h @@ -0,0 +1,26 @@ +/* + * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com) + * Licensed under the GPL + */ + +#ifndef __UM_ARCHPARAM_I386_H +#define __UM_ARCHPARAM_I386_H + +#ifdef CONFIG_X86_PAE +#define LAST_PKMAP 512 +#else +#define LAST_PKMAP 1024 +#endif + +#endif + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff --git a/arch/um/sys-i386/asm/elf.h b/arch/um/sys-i386/asm/elf.h new file mode 100644 index 0000000..d0da9d7 --- /dev/null +++ b/arch/um/sys-i386/asm/elf.h @@ -0,0 +1,163 @@ +/* + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ +#ifndef __UM_ELF_I386_H +#define __UM_ELF_I386_H + +#include <asm/user.h> +#include "skas.h" + +#define R_386_NONE 0 +#define R_386_32 1 +#define R_386_PC32 2 +#define R_386_GOT32 3 +#define R_386_PLT32 4 +#define R_386_COPY 5 +#define R_386_GLOB_DAT 6 +#define R_386_JMP_SLOT 7 +#define R_386_RELATIVE 8 +#define R_386_GOTOFF 9 +#define R_386_GOTPC 10 +#define R_386_NUM 11 + +typedef unsigned long elf_greg_t; + +#define ELF_NGREG (sizeof (struct user_regs_struct) / sizeof(elf_greg_t)) +typedef elf_greg_t elf_gregset_t[ELF_NGREG]; + +typedef struct user_i387_struct elf_fpregset_t; + +/* + * This is used to ensure we don't load something for the wrong architecture. + */ +#define elf_check_arch(x) \ + (((x)->e_machine == EM_386) || ((x)->e_machine == EM_486)) + +#define ELF_CLASS ELFCLASS32 +#define ELF_DATA ELFDATA2LSB +#define ELF_ARCH EM_386 + +#define ELF_PLAT_INIT(regs, load_addr) do { \ + PT_REGS_EBX(regs) = 0; \ + PT_REGS_ECX(regs) = 0; \ + PT_REGS_EDX(regs) = 0; \ + PT_REGS_ESI(regs) = 0; \ + PT_REGS_EDI(regs) = 0; \ + PT_REGS_EBP(regs) = 0; \ + PT_REGS_EAX(regs) = 0; \ +} while (0) + +#define USE_ELF_CORE_DUMP +#define ELF_EXEC_PAGESIZE 4096 + +#define ELF_ET_DYN_BASE (2 * TASK_SIZE / 3) + +/* Shamelessly stolen from include/asm-i386/elf.h */ + +#define ELF_CORE_COPY_REGS(pr_reg, regs) do { \ + pr_reg[0] = PT_REGS_EBX(regs); \ + pr_reg[1] = PT_REGS_ECX(regs); \ + pr_reg[2] = PT_REGS_EDX(regs); \ + pr_reg[3] = PT_REGS_ESI(regs); \ + pr_reg[4] = PT_REGS_EDI(regs); \ + pr_reg[5] = PT_REGS_EBP(regs); \ + pr_reg[6] = PT_REGS_EAX(regs); \ + pr_reg[7] = PT_REGS_DS(regs); \ + pr_reg[8] = PT_REGS_ES(regs); \ + /* fake once used fs and gs selectors? */ \ + pr_reg[9] = PT_REGS_DS(regs); \ + pr_reg[10] = PT_REGS_DS(regs); \ + pr_reg[11] = PT_REGS_SYSCALL_NR(regs); \ + pr_reg[12] = PT_REGS_IP(regs); \ + pr_reg[13] = PT_REGS_CS(regs); \ + pr_reg[14] = PT_REGS_EFLAGS(regs); \ + pr_reg[15] = PT_REGS_SP(regs); \ + pr_reg[16] = PT_REGS_SS(regs); \ +} while (0); + +extern int elf_core_copy_fpregs(struct task_struct *t, elf_fpregset_t *fpu); + +#define ELF_CORE_COPY_FPREGS(t, fpu) elf_core_copy_fpregs(t, fpu) + +extern long elf_aux_hwcap; +#define ELF_HWCAP (elf_aux_hwcap) + +extern char * elf_aux_platform; +#define ELF_PLATFORM (elf_aux_platform) + +#define SET_PERSONALITY(ex) do { } while (0) + +extern unsigned long vsyscall_ehdr; +extern unsigned long vsyscall_end; +extern unsigned long __kernel_vsyscall; + +#define VSYSCALL_BASE vsyscall_ehdr +#define VSYSCALL_END vsyscall_end + +/* + * This is the range that is readable by user mode, and things + * acting like user mode such as get_user_pages. + */ +#define FIXADDR_USER_START VSYSCALL_BASE +#define FIXADDR_USER_END VSYSCALL_END + +/* + * Architecture-neutral AT_ values in 0-17, leave some room + * for more of them, start the x86-specific ones at 32. + */ +#define AT_SYSINFO 32 +#define AT_SYSINFO_EHDR 33 + +#define ARCH_DLINFO \ +do { \ + if ( vsyscall_ehdr ) { \ + NEW_AUX_ENT(AT_SYSINFO, __kernel_vsyscall); \ + NEW_AUX_ENT(AT_SYSINFO_EHDR, vsyscall_ehdr); \ + } \ +} while (0) + +/* + * These macros parameterize elf_core_dump in fs/binfmt_elf.c to write out + * extra segments containing the vsyscall DSO contents. Dumping its + * contents makes post-mortem fully interpretable later without matching up + * the same kernel and hardware config to see what PC values meant. + * Dumping its extra ELF program headers includes all the other information + * a debugger needs to easily find how the vsyscall DSO was being used. + */ +#define ELF_CORE_EXTRA_PHDRS \ + (vsyscall_ehdr ? (((struct elfhdr *)vsyscall_ehdr)->e_phnum) : 0 ) + +#define ELF_CORE_WRITE_EXTRA_PHDRS \ +if ( vsyscall_ehdr ) { \ + const struct elfhdr *const ehdrp = (struct elfhdr *)vsyscall_ehdr; \ + const struct elf_phdr *const phdrp = \ + (const struct elf_phdr *) (vsyscall_ehdr + ehdrp->e_phoff); \ + int i; \ + Elf32_Off ofs = 0; \ + for (i = 0; i < ehdrp->e_phnum; ++i) { \ + struct elf_phdr phdr = phdrp[i]; \ + if (phdr.p_type == PT_LOAD) { \ + ofs = phdr.p_offset = offset; \ + offset += phdr.p_filesz; \ + } \ + else \ + phdr.p_offset += ofs; \ + phdr.p_paddr = 0; /* match other core phdrs */ \ + DUMP_WRITE(&phdr, sizeof(phdr)); \ + } \ +} +#define ELF_CORE_WRITE_EXTRA_DATA \ +if ( vsyscall_ehdr ) { \ + const struct elfhdr *const ehdrp = (struct elfhdr *)vsyscall_ehdr; \ + const struct elf_phdr *const phdrp = \ + (const struct elf_phdr *) (vsyscall_ehdr + ehdrp->e_phoff); \ + int i; \ + for (i = 0; i < ehdrp->e_phnum; ++i) { \ + if (phdrp[i].p_type == PT_LOAD) \ + DUMP_WRITE((void *) phdrp[i].p_vaddr, \ + phdrp[i].p_filesz); \ + } \ +} + +#endif diff --git a/arch/um/sys-i386/asm/module.h b/arch/um/sys-i386/asm/module.h new file mode 100644 index 0000000..5ead4a0 --- /dev/null +++ b/arch/um/sys-i386/asm/module.h @@ -0,0 +1,13 @@ +#ifndef __UM_MODULE_I386_H +#define __UM_MODULE_I386_H + +/* UML is simple */ +struct mod_arch_specific +{ +}; + +#define Elf_Shdr Elf32_Shdr +#define Elf_Sym Elf32_Sym +#define Elf_Ehdr Elf32_Ehdr + +#endif diff --git a/arch/um/sys-i386/asm/processor.h b/arch/um/sys-i386/asm/processor.h new file mode 100644 index 0000000..82a9061 --- /dev/null +++ b/arch/um/sys-i386/asm/processor.h @@ -0,0 +1,78 @@ +/* + * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __UM_PROCESSOR_I386_H +#define __UM_PROCESSOR_I386_H + +#include "linux/string.h" +#include <sysdep/host_ldt.h> +#include "asm/segment.h" + +extern int host_has_cmov; + +/* include faultinfo structure */ +#include "sysdep/faultinfo.h" + +struct uml_tls_struct { + struct user_desc tls; + unsigned flushed:1; + unsigned present:1; +}; + +struct arch_thread { + struct uml_tls_struct tls_array[GDT_ENTRY_TLS_ENTRIES]; + unsigned long debugregs[8]; + int debugregs_seq; + struct faultinfo faultinfo; +}; + +#define INIT_ARCH_THREAD { \ + .tls_array = { [ 0 ... GDT_ENTRY_TLS_ENTRIES - 1 ] = \ + { .present = 0, .flushed = 0 } }, \ + .debugregs = { [ 0 ... 7 ] = 0 }, \ + .debugregs_seq = 0, \ + .faultinfo = { 0, 0, 0 } \ +} + +static inline void arch_flush_thread(struct arch_thread *thread) +{ + /* Clear any TLS still hanging */ + memset(&thread->tls_array, 0, sizeof(thread->tls_array)); +} + +static inline void arch_copy_thread(struct arch_thread *from, + struct arch_thread *to) +{ + memcpy(&to->tls_array, &from->tls_array, sizeof(from->tls_array)); +} + +#include <asm/user.h> + +/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */ +static inline void rep_nop(void) +{ + __asm__ __volatile__("rep;nop": : :"memory"); +} + +#define cpu_relax() rep_nop() + +/* + * Default implementation of macro that returns current + * instruction pointer ("program counter"). Stolen + * from asm-i386/processor.h + */ +#define current_text_addr() \ + ({ void *pc; __asm__("movl $1f,%0\n1:":"=g" (pc)); pc; }) + +#define ARCH_IS_STACKGROW(address) \ + (address + 32 >= UPT_SP(¤t->thread.regs.regs)) + +#define KSTK_EIP(tsk) KSTK_REG(tsk, EIP) +#define KSTK_ESP(tsk) KSTK_REG(tsk, UESP) +#define KSTK_EBP(tsk) KSTK_REG(tsk, EBP) + +#include "asm/processor-generic.h" + +#endif diff --git a/arch/um/sys-i386/asm/ptrace.h b/arch/um/sys-i386/asm/ptrace.h new file mode 100644 index 0000000..0273e4d --- /dev/null +++ b/arch/um/sys-i386/asm/ptrace.h @@ -0,0 +1,56 @@ +/* + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#ifndef __UM_PTRACE_I386_H +#define __UM_PTRACE_I386_H + +#define HOST_AUDIT_ARCH AUDIT_ARCH_I386 + +#include "linux/compiler.h" +#include "asm/ptrace-generic.h" + +#define PT_REGS_EAX(r) UPT_EAX(&(r)->regs) +#define PT_REGS_EBX(r) UPT_EBX(&(r)->regs) +#define PT_REGS_ECX(r) UPT_ECX(&(r)->regs) +#define PT_REGS_EDX(r) UPT_EDX(&(r)->regs) +#define PT_REGS_ESI(r) UPT_ESI(&(r)->regs) +#define PT_REGS_EDI(r) UPT_EDI(&(r)->regs) +#define PT_REGS_EBP(r) UPT_EBP(&(r)->regs) + +#define PT_REGS_CS(r) UPT_CS(&(r)->regs) +#define PT_REGS_SS(r) UPT_SS(&(r)->regs) +#define PT_REGS_DS(r) UPT_DS(&(r)->regs) +#define PT_REGS_ES(r) UPT_ES(&(r)->regs) +#define PT_REGS_FS(r) UPT_FS(&(r)->regs) +#define PT_REGS_GS(r) UPT_GS(&(r)->regs) + +#define PT_REGS_EFLAGS(r) UPT_EFLAGS(&(r)->regs) + +#define PT_REGS_ORIG_SYSCALL(r) PT_REGS_EAX(r) +#define PT_REGS_SYSCALL_RET(r) PT_REGS_EAX(r) +#define PT_FIX_EXEC_STACK(sp) do ; while(0) + +#define profile_pc(regs) PT_REGS_IP(regs) + +#define user_mode(r) UPT_IS_USER(&(r)->regs) + +/* + * Forward declaration to avoid including sysdep/tls.h, which causes a + * circular include, and compilation failures. + */ +struct user_desc; + +extern int get_fpxregs(struct user_fxsr_struct __user *buf, + struct task_struct *child); +extern int set_fpxregs(struct user_fxsr_struct __user *buf, + struct task_struct *tsk); + +extern int ptrace_get_thread_area(struct task_struct *child, int idx, + struct user_desc __user *user_desc); + +extern int ptrace_set_thread_area(struct task_struct *child, int idx, + struct user_desc __user *user_desc); + +#endif diff --git a/arch/um/sys-i386/bug.c b/arch/um/sys-i386/bug.c new file mode 100644 index 0000000..8d4f273 --- /dev/null +++ b/arch/um/sys-i386/bug.c @@ -0,0 +1,21 @@ +/* + * Copyright (C) 2006 Jeff Dike (jdike@addtoit.com) + * Licensed under the GPL V2 + */ + +#include <linux/uaccess.h> +#include <asm/errno.h> + +/* Mostly copied from i386/x86_86 - eliminated the eip < PAGE_OFFSET because + * that's not relevant in skas mode. + */ + +int is_valid_bugaddr(unsigned long eip) +{ + unsigned short ud2; + + if (probe_kernel_address((unsigned short __user *)eip, ud2)) + return 0; + + return ud2 == 0x0b0f; +} diff --git a/arch/um/sys-i386/bugs.c b/arch/um/sys-i386/bugs.c new file mode 100644 index 0000000..2c6d0d7 --- /dev/null +++ b/arch/um/sys-i386/bugs.c @@ -0,0 +1,76 @@ +/* + * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include <signal.h> +#include "kern_constants.h" +#include "kern_util.h" +#include "longjmp.h" +#include "task.h" +#include "user.h" +#include "sysdep/ptrace.h" + +/* Set during early boot */ +static int host_has_cmov = 1; +static jmp_buf cmov_test_return; + +static void cmov_sigill_test_handler(int sig) +{ + host_has_cmov = 0; + longjmp(cmov_test_return, 1); +} + +void arch_check_bugs(void) +{ + struct sigaction old, new; + + printk(UM_KERN_INFO "Checking for host processor cmov support..."); + new.sa_handler = cmov_sigill_test_handler; + + /* Make sure that SIGILL is enabled after the handler longjmps back */ + new.sa_flags = SA_NODEFER; + sigemptyset(&new.sa_mask); + sigaction(SIGILL, &new, &old); + + if (setjmp(cmov_test_return) == 0) { + unsigned long foo = 0; + __asm__ __volatile__("cmovz %0, %1" : "=r" (foo) : "0" (foo)); + printk(UM_KERN_CONT "Yes\n"); + } else + printk(UM_KERN_CONT "No\n"); + + sigaction(SIGILL, &old, &new); +} + +void arch_examine_signal(int sig, struct uml_pt_regs *regs) +{ + unsigned char tmp[2]; + + /* + * This is testing for a cmov (0x0f 0x4x) instruction causing a + * SIGILL in init. + */ + if ((sig != SIGILL) || (TASK_PID(get_current()) != 1)) + return; + + if (copy_from_user_proc(tmp, (void *) UPT_IP(regs), 2)) { + printk(UM_KERN_ERR "SIGILL in init, could not read " + "instructions!\n"); + return; + } + + if ((tmp[0] != 0x0f) || ((tmp[1] & 0xf0) != 0x40)) + return; + + if (host_has_cmov == 0) + printk(UM_KERN_ERR "SIGILL caused by cmov, which this " + "processor doesn't implement. Boot a filesystem " + "compiled for older processors"); + else if (host_has_cmov == 1) + printk(UM_KERN_ERR "SIGILL caused by cmov, which this " + "processor claims to implement"); + else + printk(UM_KERN_ERR "Bad value for host_has_cmov (%d)", + host_has_cmov); +} diff --git a/arch/um/sys-i386/checksum.S b/arch/um/sys-i386/checksum.S new file mode 100644 index 0000000..f058d2f --- /dev/null +++ b/arch/um/sys-i386/checksum.S @@ -0,0 +1,458 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * IP/TCP/UDP checksumming routines + * + * Authors: Jorge Cwik, <jorge@laser.satlink.net> + * Arnt Gulbrandsen, <agulbra@nvg.unit.no> + * Tom May, <ftom@netcom.com> + * Pentium Pro/II routines: + * Alexander Kjeldaas <astor@guardian.no> + * Finn Arne Gangstad <finnag@guardian.no> + * Lots of code moved from tcp.c and ip.c; see those files + * for more names. + * + * Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception + * handling. + * Andi Kleen, add zeroing on error + * converted to pure assembler + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <asm/errno.h> + +/* + * computes a partial checksum, e.g. for TCP/UDP fragments + */ + +/* +unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum) + */ + +.text +.align 4 +.globl csum_partial + +#ifndef CONFIG_X86_USE_PPRO_CHECKSUM + + /* + * Experiments with Ethernet and SLIP connections show that buff + * is aligned on either a 2-byte or 4-byte boundary. We get at + * least a twofold speedup on 486 and Pentium if it is 4-byte aligned. + * Fortunately, it is easy to convert 2-byte alignment to 4-byte + * alignment for the unrolled loop. + */ +csum_partial: + pushl %esi + pushl %ebx + movl 20(%esp),%eax # Function arg: unsigned int sum + movl 16(%esp),%ecx # Function arg: int len + movl 12(%esp),%esi # Function arg: unsigned char *buff + testl $2, %esi # Check alignment. + jz 2f # Jump if alignment is ok. + subl $2, %ecx # Alignment uses up two bytes. + jae 1f # Jump if we had at least two bytes. + addl $2, %ecx # ecx was < 2. Deal with it. + jmp 4f +1: movw (%esi), %bx + addl $2, %esi + addw %bx, %ax + adcl $0, %eax +2: + movl %ecx, %edx + shrl $5, %ecx + jz 2f + testl %esi, %esi +1: movl (%esi), %ebx + adcl %ebx, %eax + movl 4(%esi), %ebx + adcl %ebx, %eax + movl 8(%esi), %ebx + adcl %ebx, %eax + movl 12(%esi), %ebx + adcl %ebx, %eax + movl 16(%esi), %ebx + adcl %ebx, %eax + movl 20(%esi), %ebx + adcl %ebx, %eax + movl 24(%esi), %ebx + adcl %ebx, %eax + movl 28(%esi), %ebx + adcl %ebx, %eax + lea 32(%esi), %esi + dec %ecx + jne 1b + adcl $0, %eax +2: movl %edx, %ecx + andl $0x1c, %edx + je 4f + shrl $2, %edx # This clears CF +3: adcl (%esi), %eax + lea 4(%esi), %esi + dec %edx + jne 3b + adcl $0, %eax +4: andl $3, %ecx + jz 7f + cmpl $2, %ecx + jb 5f + movw (%esi),%cx + leal 2(%esi),%esi + je 6f + shll $16,%ecx +5: movb (%esi),%cl +6: addl %ecx,%eax + adcl $0, %eax +7: + popl %ebx + popl %esi + ret + +#else + +/* Version for PentiumII/PPro */ + +csum_partial: + pushl %esi + pushl %ebx + movl 20(%esp),%eax # Function arg: unsigned int sum + movl 16(%esp),%ecx # Function arg: int len + movl 12(%esp),%esi # Function arg: const unsigned char *buf + + testl $2, %esi + jnz 30f +10: + movl %ecx, %edx + movl %ecx, %ebx + andl $0x7c, %ebx + shrl $7, %ecx + addl %ebx,%esi + shrl $2, %ebx + negl %ebx + lea 45f(%ebx,%ebx,2), %ebx + testl %esi, %esi + jmp *%ebx + + # Handle 2-byte-aligned regions +20: addw (%esi), %ax + lea 2(%esi), %esi + adcl $0, %eax + jmp 10b + +30: subl $2, %ecx + ja 20b + je 32f + movzbl (%esi),%ebx # csumming 1 byte, 2-aligned + addl %ebx, %eax + adcl $0, %eax + jmp 80f +32: + addw (%esi), %ax # csumming 2 bytes, 2-aligned + adcl $0, %eax + jmp 80f + +40: + addl -128(%esi), %eax + adcl -124(%esi), %eax + adcl -120(%esi), %eax + adcl -116(%esi), %eax + adcl -112(%esi), %eax + adcl -108(%esi), %eax + adcl -104(%esi), %eax + adcl -100(%esi), %eax + adcl -96(%esi), %eax + adcl -92(%esi), %eax + adcl -88(%esi), %eax + adcl -84(%esi), %eax + adcl -80(%esi), %eax + adcl -76(%esi), %eax + adcl -72(%esi), %eax + adcl -68(%esi), %eax + adcl -64(%esi), %eax + adcl -60(%esi), %eax + adcl -56(%esi), %eax + adcl -52(%esi), %eax + adcl -48(%esi), %eax + adcl -44(%esi), %eax + adcl -40(%esi), %eax + adcl -36(%esi), %eax + adcl -32(%esi), %eax + adcl -28(%esi), %eax + adcl -24(%esi), %eax + adcl -20(%esi), %eax + adcl -16(%esi), %eax + adcl -12(%esi), %eax + adcl -8(%esi), %eax + adcl -4(%esi), %eax +45: + lea 128(%esi), %esi + adcl $0, %eax + dec %ecx + jge 40b + movl %edx, %ecx +50: andl $3, %ecx + jz 80f + + # Handle the last 1-3 bytes without jumping + notl %ecx # 1->2, 2->1, 3->0, higher bits are masked + movl $0xffffff,%ebx # by the shll and shrl instructions + shll $3,%ecx + shrl %cl,%ebx + andl -128(%esi),%ebx # esi is 4-aligned so should be ok + addl %ebx,%eax + adcl $0,%eax +80: + popl %ebx + popl %esi + ret + +#endif + +/* +unsigned int csum_partial_copy_generic (const char *src, char *dst, + int len, int sum, int *src_err_ptr, int *dst_err_ptr) + */ + +/* + * Copy from ds while checksumming, otherwise like csum_partial + * + * The macros SRC and DST specify the type of access for the instruction. + * thus we can call a custom exception handler for all access types. + * + * FIXME: could someone double-check whether I haven't mixed up some SRC and + * DST definitions? It's damn hard to trigger all cases. I hope I got + * them all but there's no guarantee. + */ + +#define SRC(y...) \ + 9999: y; \ + .section __ex_table, "a"; \ + .long 9999b, 6001f ; \ + .previous + +#define DST(y...) \ + 9999: y; \ + .section __ex_table, "a"; \ + .long 9999b, 6002f ; \ + .previous + +.align 4 + +#ifndef CONFIG_X86_USE_PPRO_CHECKSUM + +#define ARGBASE 16 +#define FP 12 + +csum_partial_copy_generic_i386: + subl $4,%esp + pushl %edi + pushl %esi + pushl %ebx + movl ARGBASE+16(%esp),%eax # sum + movl ARGBASE+12(%esp),%ecx # len + movl ARGBASE+4(%esp),%esi # src + movl ARGBASE+8(%esp),%edi # dst + + testl $2, %edi # Check alignment. + jz 2f # Jump if alignment is ok. + subl $2, %ecx # Alignment uses up two bytes. + jae 1f # Jump if we had at least two bytes. + addl $2, %ecx # ecx was < 2. Deal with it. + jmp 4f +SRC(1: movw (%esi), %bx ) + addl $2, %esi +DST( movw %bx, (%edi) ) + addl $2, %edi + addw %bx, %ax + adcl $0, %eax +2: + movl %ecx, FP(%esp) + shrl $5, %ecx + jz 2f + testl %esi, %esi +SRC(1: movl (%esi), %ebx ) +SRC( movl 4(%esi), %edx ) + adcl %ebx, %eax +DST( movl %ebx, (%edi) ) + adcl %edx, %eax +DST( movl %edx, 4(%edi) ) + +SRC( movl 8(%esi), %ebx ) +SRC( movl 12(%esi), %edx ) + adcl %ebx, %eax +DST( movl %ebx, 8(%edi) ) + adcl %edx, %eax +DST( movl %edx, 12(%edi) ) + +SRC( movl 16(%esi), %ebx ) +SRC( movl 20(%esi), %edx ) + adcl %ebx, %eax +DST( movl %ebx, 16(%edi) ) + adcl %edx, %eax +DST( movl %edx, 20(%edi) ) + +SRC( movl 24(%esi), %ebx ) +SRC( movl 28(%esi), %edx ) + adcl %ebx, %eax +DST( movl %ebx, 24(%edi) ) + adcl %edx, %eax +DST( movl %edx, 28(%edi) ) + + lea 32(%esi), %esi + lea 32(%edi), %edi + dec %ecx + jne 1b + adcl $0, %eax +2: movl FP(%esp), %edx + movl %edx, %ecx + andl $0x1c, %edx + je 4f + shrl $2, %edx # This clears CF +SRC(3: movl (%esi), %ebx ) + adcl %ebx, %eax +DST( movl %ebx, (%edi) ) + lea 4(%esi), %esi + lea 4(%edi), %edi + dec %edx + jne 3b + adcl $0, %eax +4: andl $3, %ecx + jz 7f + cmpl $2, %ecx + jb 5f +SRC( movw (%esi), %cx ) + leal 2(%esi), %esi +DST( movw %cx, (%edi) ) + leal 2(%edi), %edi + je 6f + shll $16,%ecx +SRC(5: movb (%esi), %cl ) +DST( movb %cl, (%edi) ) +6: addl %ecx, %eax + adcl $0, %eax +7: +5000: + +# Exception handler: +.section .fixup, "ax" + +6001: + movl ARGBASE+20(%esp), %ebx # src_err_ptr + movl $-EFAULT, (%ebx) + + # zero the complete destination - computing the rest + # is too much work + movl ARGBASE+8(%esp), %edi # dst + movl ARGBASE+12(%esp), %ecx # len + xorl %eax,%eax + rep ; stosb + + jmp 5000b + +6002: + movl ARGBASE+24(%esp), %ebx # dst_err_ptr + movl $-EFAULT,(%ebx) + jmp 5000b + +.previous + + popl %ebx + popl %esi + popl %edi + popl %ecx # equivalent to addl $4,%esp + ret + +#else + +/* Version for PentiumII/PPro */ + +#define ROUND1(x) \ + SRC(movl x(%esi), %ebx ) ; \ + addl %ebx, %eax ; \ + DST(movl %ebx, x(%edi) ) ; + +#define ROUND(x) \ + SRC(movl x(%esi), %ebx ) ; \ + adcl %ebx, %eax ; \ + DST(movl %ebx, x(%edi) ) ; + +#define ARGBASE 12 + +csum_partial_copy_generic_i386: + pushl %ebx + pushl %edi + pushl %esi + movl ARGBASE+4(%esp),%esi #src + movl ARGBASE+8(%esp),%edi #dst + movl ARGBASE+12(%esp),%ecx #len + movl ARGBASE+16(%esp),%eax #sum +# movl %ecx, %edx + movl %ecx, %ebx + movl %esi, %edx + shrl $6, %ecx + andl $0x3c, %ebx + negl %ebx + subl %ebx, %esi + subl %ebx, %edi + lea -1(%esi),%edx + andl $-32,%edx + lea 3f(%ebx,%ebx), %ebx + testl %esi, %esi + jmp *%ebx +1: addl $64,%esi + addl $64,%edi + SRC(movb -32(%edx),%bl) ; SRC(movb (%edx),%bl) + ROUND1(-64) ROUND(-60) ROUND(-56) ROUND(-52) + ROUND (-48) ROUND(-44) ROUND(-40) ROUND(-36) + ROUND (-32) ROUND(-28) ROUND(-24) ROUND(-20) + ROUND (-16) ROUND(-12) ROUND(-8) ROUND(-4) +3: adcl $0,%eax + addl $64, %edx + dec %ecx + jge 1b +4: movl ARGBASE+12(%esp),%edx #len + andl $3, %edx + jz 7f + cmpl $2, %edx + jb 5f +SRC( movw (%esi), %dx ) + leal 2(%esi), %esi +DST( movw %dx, (%edi) ) + leal 2(%edi), %edi + je 6f + shll $16,%edx +5: +SRC( movb (%esi), %dl ) +DST( movb %dl, (%edi) ) +6: addl %edx, %eax + adcl $0, %eax +7: +.section .fixup, "ax" +6001: movl ARGBASE+20(%esp), %ebx # src_err_ptr + movl $-EFAULT, (%ebx) + # zero the complete destination (computing the rest is too much work) + movl ARGBASE+8(%esp),%edi # dst + movl ARGBASE+12(%esp),%ecx # len + xorl %eax,%eax + rep; stosb + jmp 7b +6002: movl ARGBASE+24(%esp), %ebx # dst_err_ptr + movl $-EFAULT, (%ebx) + jmp 7b +.previous + + popl %esi + popl %edi + popl %ebx + ret + +#undef ROUND +#undef ROUND1 + +#endif diff --git a/arch/um/sys-i386/delay.c b/arch/um/sys-i386/delay.c new file mode 100644 index 0000000..d623e07 --- /dev/null +++ b/arch/um/sys-i386/delay.c @@ -0,0 +1,29 @@ +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/delay.h> +#include <asm/param.h> + +void __delay(unsigned long time) +{ + /* Stolen from the i386 __loop_delay */ + int d0; + __asm__ __volatile__( + "\tjmp 1f\n" + ".align 16\n" + "1:\tjmp 2f\n" + ".align 16\n" + "2:\tdecl %0\n\tjns 2b" + :"=&a" (d0) + :"0" (time)); +} + +void __udelay(unsigned long usecs) +{ + int i, n; + + n = (loops_per_jiffy * HZ * usecs) / MILLION; + for(i=0;i<n;i++) + cpu_relax(); +} + +EXPORT_SYMBOL(__udelay); diff --git a/arch/um/sys-i386/fault.c b/arch/um/sys-i386/fault.c new file mode 100644 index 0000000..d670f68 --- /dev/null +++ b/arch/um/sys-i386/fault.c @@ -0,0 +1,28 @@ +/* + * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include "sysdep/ptrace.h" + +/* These two are from asm-um/uaccess.h and linux/module.h, check them. */ +struct exception_table_entry +{ + unsigned long insn; + unsigned long fixup; +}; + +const struct exception_table_entry *search_exception_tables(unsigned long add); + +/* Compare this to arch/i386/mm/extable.c:fixup_exception() */ +int arch_fixup(unsigned long address, struct uml_pt_regs *regs) +{ + const struct exception_table_entry *fixup; + + fixup = search_exception_tables(address); + if (fixup != 0) { + UPT_IP(regs) = fixup->fixup; + return 1; + } + return 0; +} diff --git a/arch/um/sys-i386/ksyms.c b/arch/um/sys-i386/ksyms.c new file mode 100644 index 0000000..bfbefd3 --- /dev/null +++ b/arch/um/sys-i386/ksyms.c @@ -0,0 +1,5 @@ +#include "linux/module.h" +#include "asm/checksum.h" + +/* Networking helper routines. */ +EXPORT_SYMBOL(csum_partial); diff --git a/arch/um/sys-i386/ldt.c b/arch/um/sys-i386/ldt.c new file mode 100644 index 0000000..a4846a8 --- /dev/null +++ b/arch/um/sys-i386/ldt.c @@ -0,0 +1,501 @@ +/* + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include <linux/mm.h> +#include <linux/sched.h> +#include <asm/unistd.h> +#include "os.h" +#include "proc_mm.h" +#include "skas.h" +#include "skas_ptrace.h" +#include "sysdep/tls.h" + +extern int modify_ldt(int func, void *ptr, unsigned long bytecount); + +static long write_ldt_entry(struct mm_id *mm_idp, int func, + struct user_desc *desc, void **addr, int done) +{ + long res; + + if (proc_mm) { + /* + * This is a special handling for the case, that the mm to + * modify isn't current->active_mm. + * If this is called directly by modify_ldt, + * (current->active_mm->context.skas.u == mm_idp) + * will be true. So no call to __switch_mm(mm_idp) is done. + * If this is called in case of init_new_ldt or PTRACE_LDT, + * mm_idp won't belong to current->active_mm, but child->mm. + * So we need to switch child's mm into our userspace, then + * later switch back. + * + * Note: I'm unsure: should interrupts be disabled here? + */ + if (!current->active_mm || current->active_mm == &init_mm || + mm_idp != ¤t->active_mm->context.id) + __switch_mm(mm_idp); + } + + if (ptrace_ldt) { + struct ptrace_ldt ldt_op = (struct ptrace_ldt) { + .func = func, + .ptr = desc, + .bytecount = sizeof(*desc)}; + u32 cpu; + int pid; + + if (!proc_mm) + pid = mm_idp->u.pid; + else { + cpu = get_cpu(); + pid = userspace_pid[cpu]; + } + + res = os_ptrace_ldt(pid, 0, (unsigned long) &ldt_op); + + if (proc_mm) + put_cpu(); + } + else { + void *stub_addr; + res = syscall_stub_data(mm_idp, (unsigned long *)desc, + (sizeof(*desc) + sizeof(long) - 1) & + ~(sizeof(long) - 1), + addr, &stub_addr); + if (!res) { + unsigned long args[] = { func, + (unsigned long)stub_addr, + sizeof(*desc), + 0, 0, 0 }; + res = run_syscall_stub(mm_idp, __NR_modify_ldt, args, + 0, addr, done); + } + } + + if (proc_mm) { + /* + * This is the second part of special handling, that makes + * PTRACE_LDT possible to implement. + */ + if (current->active_mm && current->active_mm != &init_mm && + mm_idp != ¤t->active_mm->context.id) + __switch_mm(¤t->active_mm->context.id); + } + + return res; +} + +static long read_ldt_from_host(void __user * ptr, unsigned long bytecount) +{ + int res, n; + struct ptrace_ldt ptrace_ldt = (struct ptrace_ldt) { + .func = 0, + .bytecount = bytecount, + .ptr = kmalloc(bytecount, GFP_KERNEL)}; + u32 cpu; + + if (ptrace_ldt.ptr == NULL) + return -ENOMEM; + + /* + * This is called from sys_modify_ldt only, so userspace_pid gives + * us the right number + */ + + cpu = get_cpu(); + res = os_ptrace_ldt(userspace_pid[cpu], 0, (unsigned long) &ptrace_ldt); + put_cpu(); + if (res < 0) + goto out; + + n = copy_to_user(ptr, ptrace_ldt.ptr, res); + if (n != 0) + res = -EFAULT; + + out: + kfree(ptrace_ldt.ptr); + + return res; +} + +/* + * In skas mode, we hold our own ldt data in UML. + * Thus, the code implementing sys_modify_ldt_skas + * is very similar to (and mostly stolen from) sys_modify_ldt + * for arch/i386/kernel/ldt.c + * The routines copied and modified in part are: + * - read_ldt + * - read_default_ldt + * - write_ldt + * - sys_modify_ldt_skas + */ + +static int read_ldt(void __user * ptr, unsigned long bytecount) +{ + int i, err = 0; + unsigned long size; + uml_ldt_t * ldt = ¤t->mm->context.ldt; + + if (!ldt->entry_count) + goto out; + if (bytecount > LDT_ENTRY_SIZE*LDT_ENTRIES) + bytecount = LDT_ENTRY_SIZE*LDT_ENTRIES; + err = bytecount; + + if (ptrace_ldt) + return read_ldt_from_host(ptr, bytecount); + + mutex_lock(&ldt->lock); + if (ldt->entry_count <= LDT_DIRECT_ENTRIES) { + size = LDT_ENTRY_SIZE*LDT_DIRECT_ENTRIES; + if (size > bytecount) + size = bytecount; + if (copy_to_user(ptr, ldt->u.entries, size)) + err = -EFAULT; + bytecount -= size; + ptr += size; + } + else { + for (i=0; i<ldt->entry_count/LDT_ENTRIES_PER_PAGE && bytecount; + i++) { + size = PAGE_SIZE; + if (size > bytecount) + size = bytecount; + if (copy_to_user(ptr, ldt->u.pages[i], size)) { + err = -EFAULT; + break; + } + bytecount -= size; + ptr += size; + } + } + mutex_unlock(&ldt->lock); + + if (bytecount == 0 || err == -EFAULT) + goto out; + + if (clear_user(ptr, bytecount)) + err = -EFAULT; + +out: + return err; +} + +static int read_default_ldt(void __user * ptr, unsigned long bytecount) +{ + int err; + + if (bytecount > 5*LDT_ENTRY_SIZE) + bytecount = 5*LDT_ENTRY_SIZE; + + err = bytecount; + /* + * UML doesn't support lcall7 and lcall27. + * So, we don't really have a default ldt, but emulate + * an empty ldt of common host default ldt size. + */ + if (clear_user(ptr, bytecount)) + err = -EFAULT; + + return err; +} + +static int write_ldt(void __user * ptr, unsigned long bytecount, int func) +{ + uml_ldt_t * ldt = ¤t->mm->context.ldt; + struct mm_id * mm_idp = ¤t->mm->context.id; + int i, err; + struct user_desc ldt_info; + struct ldt_entry entry0, *ldt_p; + void *addr = NULL; + + err = -EINVAL; + if (bytecount != sizeof(ldt_info)) + goto out; + err = -EFAULT; + if (copy_from_user(&ldt_info, ptr, sizeof(ldt_info))) + goto out; + + err = -EINVAL; + if (ldt_info.entry_number >= LDT_ENTRIES) + goto out; + if (ldt_info.contents == 3) { + if (func == 1) + goto out; + if (ldt_info.seg_not_present == 0) + goto out; + } + + if (!ptrace_ldt) + mutex_lock(&ldt->lock); + + err = write_ldt_entry(mm_idp, func, &ldt_info, &addr, 1); + if (err) + goto out_unlock; + else if (ptrace_ldt) { + /* With PTRACE_LDT available, this is used as a flag only */ + ldt->entry_count = 1; + goto out; + } + + if (ldt_info.entry_number >= ldt->entry_count && + ldt_info.entry_number >= LDT_DIRECT_ENTRIES) { + for (i=ldt->entry_count/LDT_ENTRIES_PER_PAGE; + i*LDT_ENTRIES_PER_PAGE <= ldt_info.entry_number; + i++) { + if (i == 0) + memcpy(&entry0, ldt->u.entries, + sizeof(entry0)); + ldt->u.pages[i] = (struct ldt_entry *) + __get_free_page(GFP_KERNEL|__GFP_ZERO); + if (!ldt->u.pages[i]) { + err = -ENOMEM; + /* Undo the change in host */ + memset(&ldt_info, 0, sizeof(ldt_info)); + write_ldt_entry(mm_idp, 1, &ldt_info, &addr, 1); + goto out_unlock; + } + if (i == 0) { + memcpy(ldt->u.pages[0], &entry0, + sizeof(entry0)); + memcpy(ldt->u.pages[0]+1, ldt->u.entries+1, + sizeof(entry0)*(LDT_DIRECT_ENTRIES-1)); + } + ldt->entry_count = (i + 1) * LDT_ENTRIES_PER_PAGE; + } + } + if (ldt->entry_count <= ldt_info.entry_number) + ldt->entry_count = ldt_info.entry_number + 1; + + if (ldt->entry_count <= LDT_DIRECT_ENTRIES) + ldt_p = ldt->u.entries + ldt_info.entry_number; + else + ldt_p = ldt->u.pages[ldt_info.entry_number/LDT_ENTRIES_PER_PAGE] + + ldt_info.entry_number%LDT_ENTRIES_PER_PAGE; + + if (ldt_info.base_addr == 0 && ldt_info.limit == 0 && + (func == 1 || LDT_empty(&ldt_info))) { + ldt_p->a = 0; + ldt_p->b = 0; + } + else{ + if (func == 1) + ldt_info.useable = 0; + ldt_p->a = LDT_entry_a(&ldt_info); + ldt_p->b = LDT_entry_b(&ldt_info); + } + err = 0; + +out_unlock: + mutex_unlock(&ldt->lock); +out: + return err; +} + +static long do_modify_ldt_skas(int func, void __user *ptr, + unsigned long bytecount) +{ + int ret = -ENOSYS; + + switch (func) { + case 0: + ret = read_ldt(ptr, bytecount); + break; + case 1: + case 0x11: + ret = write_ldt(ptr, bytecount, func); + break; + case 2: + ret = read_default_ldt(ptr, bytecount); + break; + } + return ret; +} + +static DEFINE_SPINLOCK(host_ldt_lock); +static short dummy_list[9] = {0, -1}; +static short * host_ldt_entries = NULL; + +static void ldt_get_host_info(void) +{ + long ret; + struct ldt_entry * ldt; + short *tmp; + int i, size, k, order; + + spin_lock(&host_ldt_lock); + + if (host_ldt_entries != NULL) { + spin_unlock(&host_ldt_lock); + return; + } + host_ldt_entries = dummy_list+1; + + spin_unlock(&host_ldt_lock); + + for (i = LDT_PAGES_MAX-1, order=0; i; i>>=1, order++) + ; + + ldt = (struct ldt_entry *) + __get_free_pages(GFP_KERNEL|__GFP_ZERO, order); + if (ldt == NULL) { + printk(KERN_ERR "ldt_get_host_info: couldn't allocate buffer " + "for host ldt\n"); + return; + } + + ret = modify_ldt(0, ldt, (1<<order)*PAGE_SIZE); + if (ret < 0) { + printk(KERN_ERR "ldt_get_host_info: couldn't read host ldt\n"); + goto out_free; + } + if (ret == 0) { + /* default_ldt is active, simply write an empty entry 0 */ + host_ldt_entries = dummy_list; + goto out_free; + } + + for (i=0, size=0; i<ret/LDT_ENTRY_SIZE; i++) { + if (ldt[i].a != 0 || ldt[i].b != 0) + size++; + } + + if (size < ARRAY_SIZE(dummy_list)) + host_ldt_entries = dummy_list; + else { + size = (size + 1) * sizeof(dummy_list[0]); + tmp = kmalloc(size, GFP_KERNEL); + if (tmp == NULL) { + printk(KERN_ERR "ldt_get_host_info: couldn't allocate " + "host ldt list\n"); + goto out_free; + } + host_ldt_entries = tmp; + } + + for (i=0, k=0; i<ret/LDT_ENTRY_SIZE; i++) { + if (ldt[i].a != 0 || ldt[i].b != 0) + host_ldt_entries[k++] = i; + } + host_ldt_entries[k] = -1; + +out_free: + free_pages((unsigned long)ldt, order); +} + +long init_new_ldt(struct mm_context *new_mm, struct mm_context *from_mm) +{ + struct user_desc desc; + short * num_p; + int i; + long page, err=0; + void *addr = NULL; + struct proc_mm_op copy; + + + if (!ptrace_ldt) + mutex_init(&new_mm->ldt.lock); + + if (!from_mm) { + memset(&desc, 0, sizeof(desc)); + /* + * We have to initialize a clean ldt. + */ + if (proc_mm) { + /* + * If the new mm was created using proc_mm, host's + * default-ldt currently is assigned, which normally + * contains the call-gates for lcall7 and lcall27. + * To remove these gates, we simply write an empty + * entry as number 0 to the host. + */ + err = write_ldt_entry(&new_mm->id, 1, &desc, &addr, 1); + } + else{ + /* + * Now we try to retrieve info about the ldt, we + * inherited from the host. All ldt-entries found + * will be reset in the following loop + */ + ldt_get_host_info(); + for (num_p=host_ldt_entries; *num_p != -1; num_p++) { + desc.entry_number = *num_p; + err = write_ldt_entry(&new_mm->id, 1, &desc, + &addr, *(num_p + 1) == -1); + if (err) + break; + } + } + new_mm->ldt.entry_count = 0; + + goto out; + } + + if (proc_mm) { + /* + * We have a valid from_mm, so we now have to copy the LDT of + * from_mm to new_mm, because using proc_mm an new mm with + * an empty/default LDT was created in new_mm() + */ + copy = ((struct proc_mm_op) { .op = MM_COPY_SEGMENTS, + .u = + { .copy_segments = + from_mm->id.u.mm_fd } } ); + i = os_write_file(new_mm->id.u.mm_fd, ©, sizeof(copy)); + if (i != sizeof(copy)) + printk(KERN_ERR "new_mm : /proc/mm copy_segments " + "failed, err = %d\n", -i); + } + + if (!ptrace_ldt) { + /* + * Our local LDT is used to supply the data for + * modify_ldt(READLDT), if PTRACE_LDT isn't available, + * i.e., we have to use the stub for modify_ldt, which + * can't handle the big read buffer of up to 64kB. + */ + mutex_lock(&from_mm->ldt.lock); + if (from_mm->ldt.entry_count <= LDT_DIRECT_ENTRIES) + memcpy(new_mm->ldt.u.entries, from_mm->ldt.u.entries, + sizeof(new_mm->ldt.u.entries)); + else { + i = from_mm->ldt.entry_count / LDT_ENTRIES_PER_PAGE; + while (i-->0) { + page = __get_free_page(GFP_KERNEL|__GFP_ZERO); + if (!page) { + err = -ENOMEM; + break; + } + new_mm->ldt.u.pages[i] = + (struct ldt_entry *) page; + memcpy(new_mm->ldt.u.pages[i], + from_mm->ldt.u.pages[i], PAGE_SIZE); + } + } + new_mm->ldt.entry_count = from_mm->ldt.entry_count; + mutex_unlock(&from_mm->ldt.lock); + } + + out: + return err; +} + + +void free_ldt(struct mm_context *mm) +{ + int i; + + if (!ptrace_ldt && mm->ldt.entry_count > LDT_DIRECT_ENTRIES) { + i = mm->ldt.entry_count / LDT_ENTRIES_PER_PAGE; + while (i-- > 0) + free_page((long) mm->ldt.u.pages[i]); + } + mm->ldt.entry_count = 0; +} + +int sys_modify_ldt(int func, void __user *ptr, unsigned long bytecount) +{ + return do_modify_ldt_skas(func, ptr, bytecount); +} diff --git a/arch/um/sys-i386/ptrace.c b/arch/um/sys-i386/ptrace.c new file mode 100644 index 0000000..c9b1765 --- /dev/null +++ b/arch/um/sys-i386/ptrace.c @@ -0,0 +1,210 @@ +/* + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include "linux/mm.h" +#include "linux/sched.h" +#include "asm/uaccess.h" +#include "skas.h" + +extern int arch_switch_tls(struct task_struct *to); + +void arch_switch_to(struct task_struct *to) +{ + int err = arch_switch_tls(to); + if (!err) + return; + + if (err != -EINVAL) + printk(KERN_WARNING "arch_switch_tls failed, errno %d, " + "not EINVAL\n", -err); + else + printk(KERN_WARNING "arch_switch_tls failed, errno = EINVAL\n"); +} + +int is_syscall(unsigned long addr) +{ + unsigned short instr; + int n; + + n = copy_from_user(&instr, (void __user *) addr, sizeof(instr)); + if (n) { + /* access_process_vm() grants access to vsyscall and stub, + * while copy_from_user doesn't. Maybe access_process_vm is + * slow, but that doesn't matter, since it will be called only + * in case of singlestepping, if copy_from_user failed. + */ + n = access_process_vm(current, addr, &instr, sizeof(instr), 0); + if (n != sizeof(instr)) { + printk(KERN_ERR "is_syscall : failed to read " + "instruction from 0x%lx\n", addr); + return 1; + } + } + /* int 0x80 or sysenter */ + return (instr == 0x80cd) || (instr == 0x340f); +} + +/* determines which flags the user has access to. */ +/* 1 = access 0 = no access */ +#define FLAG_MASK 0x00044dd5 + +int putreg(struct task_struct *child, int regno, unsigned long value) +{ + regno >>= 2; + switch (regno) { + case FS: + if (value && (value & 3) != 3) + return -EIO; + PT_REGS_FS(&child->thread.regs) = value; + return 0; + case GS: + if (value && (value & 3) != 3) + return -EIO; + PT_REGS_GS(&child->thread.regs) = value; + return 0; + case DS: + case ES: + if (value && (value & 3) != 3) + return -EIO; + value &= 0xffff; + break; + case SS: + case CS: + if ((value & 3) != 3) + return -EIO; + value &= 0xffff; + break; + case EFL: + value &= FLAG_MASK; + value |= PT_REGS_EFLAGS(&child->thread.regs); + break; + } + PT_REGS_SET(&child->thread.regs, regno, value); + return 0; +} + +int poke_user(struct task_struct *child, long addr, long data) +{ + if ((addr & 3) || addr < 0) + return -EIO; + + if (addr < MAX_REG_OFFSET) + return putreg(child, addr, data); + else if ((addr >= offsetof(struct user, u_debugreg[0])) && + (addr <= offsetof(struct user, u_debugreg[7]))) { + addr -= offsetof(struct user, u_debugreg[0]); + addr = addr >> 2; + if ((addr == 4) || (addr == 5)) + return -EIO; + child->thread.arch.debugregs[addr] = data; + return 0; + } + return -EIO; +} + +unsigned long getreg(struct task_struct *child, int regno) +{ + unsigned long retval = ~0UL; + + regno >>= 2; + switch (regno) { + case FS: + case GS: + case DS: + case ES: + case SS: + case CS: + retval = 0xffff; + /* fall through */ + default: + retval &= PT_REG(&child->thread.regs, regno); + } + return retval; +} + +/* read the word at location addr in the USER area. */ +int peek_user(struct task_struct *child, long addr, long data) +{ + unsigned long tmp; + + if ((addr & 3) || addr < 0) + return -EIO; + + tmp = 0; /* Default return condition */ + if (addr < MAX_REG_OFFSET) { + tmp = getreg(child, addr); + } + else if ((addr >= offsetof(struct user, u_debugreg[0])) && + (addr <= offsetof(struct user, u_debugreg[7]))) { + addr -= offsetof(struct user, u_debugreg[0]); + addr = addr >> 2; + tmp = child->thread.arch.debugregs[addr]; + } + return put_user(tmp, (unsigned long __user *) data); +} + +int get_fpregs(struct user_i387_struct __user *buf, struct task_struct *child) +{ + int err, n, cpu = ((struct thread_info *) child->stack)->cpu; + struct user_i387_struct fpregs; + + err = save_fp_registers(userspace_pid[cpu], (unsigned long *) &fpregs); + if (err) + return err; + + n = copy_to_user(buf, &fpregs, sizeof(fpregs)); + if(n > 0) + return -EFAULT; + + return n; +} + +int set_fpregs(struct user_i387_struct __user *buf, struct task_struct *child) +{ + int n, cpu = ((struct thread_info *) child->stack)->cpu; + struct user_i387_struct fpregs; + + n = copy_from_user(&fpregs, buf, sizeof(fpregs)); + if (n > 0) + return -EFAULT; + + return restore_fp_registers(userspace_pid[cpu], + (unsigned long *) &fpregs); +} + +int get_fpxregs(struct user_fxsr_struct __user *buf, struct task_struct *child) +{ + int err, n, cpu = ((struct thread_info *) child->stack)->cpu; + struct user_fxsr_struct fpregs; + + err = save_fpx_registers(userspace_pid[cpu], (unsigned long *) &fpregs); + if (err) + return err; + + n = copy_to_user(buf, &fpregs, sizeof(fpregs)); + if(n > 0) + return -EFAULT; + + return n; +} + +int set_fpxregs(struct user_fxsr_struct __user *buf, struct task_struct *child) +{ + int n, cpu = ((struct thread_info *) child->stack)->cpu; + struct user_fxsr_struct fpregs; + + n = copy_from_user(&fpregs, buf, sizeof(fpregs)); + if (n > 0) + return -EFAULT; + + return restore_fpx_registers(userspace_pid[cpu], + (unsigned long *) &fpregs); +} + +long subarch_ptrace(struct task_struct *child, long request, long addr, + long data) +{ + return -EIO; +} diff --git a/arch/um/sys-i386/ptrace_user.c b/arch/um/sys-i386/ptrace_user.c new file mode 100644 index 0000000..0b10c3e --- /dev/null +++ b/arch/um/sys-i386/ptrace_user.c @@ -0,0 +1,21 @@ +/* + * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include <errno.h> +#include <sys/ptrace.h> + +int ptrace_getregs(long pid, unsigned long *regs_out) +{ + if (ptrace(PTRACE_GETREGS, pid, 0, regs_out) < 0) + return -errno; + return 0; +} + +int ptrace_setregs(long pid, unsigned long *regs) +{ + if (ptrace(PTRACE_SETREGS, pid, 0, regs) < 0) + return -errno; + return 0; +} diff --git a/arch/um/sys-i386/setjmp.S b/arch/um/sys-i386/setjmp.S new file mode 100644 index 0000000..b766792 --- /dev/null +++ b/arch/um/sys-i386/setjmp.S @@ -0,0 +1,58 @@ +# +# arch/i386/setjmp.S +# +# setjmp/longjmp for the i386 architecture +# + +# +# The jmp_buf is assumed to contain the following, in order: +# %ebx +# %esp +# %ebp +# %esi +# %edi +# <return address> +# + + .text + .align 4 + .globl setjmp + .type setjmp, @function +setjmp: +#ifdef _REGPARM + movl %eax,%edx +#else + movl 4(%esp),%edx +#endif + popl %ecx # Return address, and adjust the stack + xorl %eax,%eax # Return value + movl %ebx,(%edx) + movl %esp,4(%edx) # Post-return %esp! + pushl %ecx # Make the call/return stack happy + movl %ebp,8(%edx) + movl %esi,12(%edx) + movl %edi,16(%edx) + movl %ecx,20(%edx) # Return address + ret + + .size setjmp,.-setjmp + + .text + .align 4 + .globl longjmp + .type longjmp, @function +longjmp: +#ifdef _REGPARM + xchgl %eax,%edx +#else + movl 4(%esp),%edx # jmp_ptr address + movl 8(%esp),%eax # Return value +#endif + movl (%edx),%ebx + movl 4(%edx),%esp + movl 8(%edx),%ebp + movl 12(%edx),%esi + movl 16(%edx),%edi + jmp *20(%edx) + + .size longjmp,.-longjmp diff --git a/arch/um/sys-i386/shared/sysdep/archsetjmp.h b/arch/um/sys-i386/shared/sysdep/archsetjmp.h new file mode 100644 index 0000000..0f31208 --- /dev/null +++ b/arch/um/sys-i386/shared/sysdep/archsetjmp.h @@ -0,0 +1,22 @@ +/* + * arch/um/include/sysdep-i386/archsetjmp.h + */ + +#ifndef _KLIBC_ARCHSETJMP_H +#define _KLIBC_ARCHSETJMP_H + +struct __jmp_buf { + unsigned int __ebx; + unsigned int __esp; + unsigned int __ebp; + unsigned int __esi; + unsigned int __edi; + unsigned int __eip; +}; + +typedef struct __jmp_buf jmp_buf[1]; + +#define JB_IP __eip +#define JB_SP __esp + +#endif /* _SETJMP_H */ diff --git a/arch/um/sys-i386/shared/sysdep/barrier.h b/arch/um/sys-i386/shared/sysdep/barrier.h new file mode 100644 index 0000000..b58d52c --- /dev/null +++ b/arch/um/sys-i386/shared/sysdep/barrier.h @@ -0,0 +1,9 @@ +#ifndef __SYSDEP_I386_BARRIER_H +#define __SYSDEP_I386_BARRIER_H + +/* Copied from include/asm-i386 for use by userspace. i386 has the option + * of using mfence, but I'm just using this, which works everywhere, for now. + */ +#define mb() asm volatile("lock; addl $0,0(%esp)") + +#endif diff --git a/arch/um/sys-i386/shared/sysdep/checksum.h b/arch/um/sys-i386/shared/sysdep/checksum.h new file mode 100644 index 0000000..0cb4645 --- /dev/null +++ b/arch/um/sys-i386/shared/sysdep/checksum.h @@ -0,0 +1,211 @@ +/* + * Licensed under the GPL + */ + +#ifndef __UM_SYSDEP_CHECKSUM_H +#define __UM_SYSDEP_CHECKSUM_H + +#include "linux/in6.h" +#include "linux/string.h" + +/* + * computes the checksum of a memory block at buff, length len, + * and adds in "sum" (32-bit) + * + * returns a 32-bit number suitable for feeding into itself + * or csum_tcpudp_magic + * + * this function must be called with even lengths, except + * for the last fragment, which may be odd + * + * it's best to have buff aligned on a 32-bit boundary + */ +__wsum csum_partial(const void *buff, int len, __wsum sum); + +/* + * Note: when you get a NULL pointer exception here this means someone + * passed in an incorrect kernel address to one of these functions. + * + * If you use these functions directly please don't forget the + * access_ok(). + */ + +static __inline__ +__wsum csum_partial_copy_nocheck(const void *src, void *dst, + int len, __wsum sum) +{ + memcpy(dst, src, len); + return csum_partial(dst, len, sum); +} + +/* + * the same as csum_partial, but copies from src while it + * checksums, and handles user-space pointer exceptions correctly, when needed. + * + * here even more important to align src and dst on a 32-bit (or even + * better 64-bit) boundary + */ + +static __inline__ +__wsum csum_partial_copy_from_user(const void __user *src, void *dst, + int len, __wsum sum, int *err_ptr) +{ + if (copy_from_user(dst, src, len)) { + *err_ptr = -EFAULT; + return (__force __wsum)-1; + } + + return csum_partial(dst, len, sum); +} + +/* + * This is a version of ip_compute_csum() optimized for IP headers, + * which always checksum on 4 octet boundaries. + * + * By Jorge Cwik <jorge@laser.satlink.net>, adapted for linux by + * Arnt Gulbrandsen. + */ +static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl) +{ + unsigned int sum; + + __asm__ __volatile__( + "movl (%1), %0 ;\n" + "subl $4, %2 ;\n" + "jbe 2f ;\n" + "addl 4(%1), %0 ;\n" + "adcl 8(%1), %0 ;\n" + "adcl 12(%1), %0 ;\n" +"1: adcl 16(%1), %0 ;\n" + "lea 4(%1), %1 ;\n" + "decl %2 ;\n" + "jne 1b ;\n" + "adcl $0, %0 ;\n" + "movl %0, %2 ;\n" + "shrl $16, %0 ;\n" + "addw %w2, %w0 ;\n" + "adcl $0, %0 ;\n" + "notl %0 ;\n" +"2: ;\n" + /* Since the input registers which are loaded with iph and ipl + are modified, we must also specify them as outputs, or gcc + will assume they contain their original values. */ + : "=r" (sum), "=r" (iph), "=r" (ihl) + : "1" (iph), "2" (ihl) + : "memory"); + return (__force __sum16)sum; +} + +/* + * Fold a partial checksum + */ + +static inline __sum16 csum_fold(__wsum sum) +{ + __asm__( + "addl %1, %0 ;\n" + "adcl $0xffff, %0 ;\n" + : "=r" (sum) + : "r" ((__force u32)sum << 16), + "0" ((__force u32)sum & 0xffff0000) + ); + return (__force __sum16)(~(__force u32)sum >> 16); +} + +static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, + unsigned short len, + unsigned short proto, + __wsum sum) +{ + __asm__( + "addl %1, %0 ;\n" + "adcl %2, %0 ;\n" + "adcl %3, %0 ;\n" + "adcl $0, %0 ;\n" + : "=r" (sum) + : "g" (daddr), "g"(saddr), "g"((len + proto) << 8), "0"(sum)); + return sum; +} + +/* + * computes the checksum of the TCP/UDP pseudo-header + * returns a 16-bit checksum, already complemented + */ +static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, + unsigned short len, + unsigned short proto, + __wsum sum) +{ + return csum_fold(csum_tcpudp_nofold(saddr,daddr,len,proto,sum)); +} + +/* + * this routine is used for miscellaneous IP-like checksums, mainly + * in icmp.c + */ + +static inline __sum16 ip_compute_csum(const void *buff, int len) +{ + return csum_fold (csum_partial(buff, len, 0)); +} + +#define _HAVE_ARCH_IPV6_CSUM +static __inline__ __sum16 csum_ipv6_magic(const struct in6_addr *saddr, + const struct in6_addr *daddr, + __u32 len, unsigned short proto, + __wsum sum) +{ + __asm__( + "addl 0(%1), %0 ;\n" + "adcl 4(%1), %0 ;\n" + "adcl 8(%1), %0 ;\n" + "adcl 12(%1), %0 ;\n" + "adcl 0(%2), %0 ;\n" + "adcl 4(%2), %0 ;\n" + "adcl 8(%2), %0 ;\n" + "adcl 12(%2), %0 ;\n" + "adcl %3, %0 ;\n" + "adcl %4, %0 ;\n" + "adcl $0, %0 ;\n" + : "=&r" (sum) + : "r" (saddr), "r" (daddr), + "r"(htonl(len)), "r"(htonl(proto)), "0"(sum)); + + return csum_fold(sum); +} + +/* + * Copy and checksum to user + */ +#define HAVE_CSUM_COPY_USER +static __inline__ __wsum csum_and_copy_to_user(const void *src, + void __user *dst, + int len, __wsum sum, int *err_ptr) +{ + if (access_ok(VERIFY_WRITE, dst, len)) { + if (copy_to_user(dst, src, len)) { + *err_ptr = -EFAULT; + return (__force __wsum)-1; + } + + return csum_partial(src, len, sum); + } + + if (len) + *err_ptr = -EFAULT; + + return (__force __wsum)-1; /* invalid checksum */ +} + +#endif + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff --git a/arch/um/sys-i386/shared/sysdep/faultinfo.h b/arch/um/sys-i386/shared/sysdep/faultinfo.h new file mode 100644 index 0000000..db437cc --- /dev/null +++ b/arch/um/sys-i386/shared/sysdep/faultinfo.h @@ -0,0 +1,29 @@ +/* + * Copyright (C) 2004 Fujitsu Siemens Computers GmbH + * Author: Bodo Stroesser <bstroesser@fujitsu-siemens.com> + * Licensed under the GPL + */ + +#ifndef __FAULTINFO_I386_H +#define __FAULTINFO_I386_H + +/* this structure contains the full arch-specific faultinfo + * from the traps. + * On i386, ptrace_faultinfo unfortunately doesn't provide + * all the info, since trap_no is missing. + * All common elements are defined at the same position in + * both structures, thus making it easy to copy the + * contents without knowledge about the structure elements. + */ +struct faultinfo { + int error_code; /* in ptrace_faultinfo misleadingly called is_write */ + unsigned long cr2; /* in ptrace_faultinfo called addr */ + int trap_no; /* missing in ptrace_faultinfo */ +}; + +#define FAULT_WRITE(fi) ((fi).error_code & 2) +#define FAULT_ADDRESS(fi) ((fi).cr2) + +#define PTRACE_FULL_FAULTINFO 0 + +#endif diff --git a/arch/um/sys-i386/shared/sysdep/host_ldt.h b/arch/um/sys-i386/shared/sysdep/host_ldt.h new file mode 100644 index 0000000..0953cc4 --- /dev/null +++ b/arch/um/sys-i386/shared/sysdep/host_ldt.h @@ -0,0 +1,34 @@ +#ifndef __ASM_HOST_LDT_I386_H +#define __ASM_HOST_LDT_I386_H + +#include <asm/ldt.h> + +/* + * macros stolen from include/asm-i386/desc.h + */ +#define LDT_entry_a(info) \ + ((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff)) + +#define LDT_entry_b(info) \ + (((info)->base_addr & 0xff000000) | \ + (((info)->base_addr & 0x00ff0000) >> 16) | \ + ((info)->limit & 0xf0000) | \ + (((info)->read_exec_only ^ 1) << 9) | \ + ((info)->contents << 10) | \ + (((info)->seg_not_present ^ 1) << 15) | \ + ((info)->seg_32bit << 22) | \ + ((info)->limit_in_pages << 23) | \ + ((info)->useable << 20) | \ + 0x7000) + +#define LDT_empty(info) (\ + (info)->base_addr == 0 && \ + (info)->limit == 0 && \ + (info)->contents == 0 && \ + (info)->read_exec_only == 1 && \ + (info)->seg_32bit == 0 && \ + (info)->limit_in_pages == 0 && \ + (info)->seg_not_present == 1 && \ + (info)->useable == 0 ) + +#endif diff --git a/arch/um/sys-i386/shared/sysdep/kernel-offsets.h b/arch/um/sys-i386/shared/sysdep/kernel-offsets.h new file mode 100644 index 0000000..5868526 --- /dev/null +++ b/arch/um/sys-i386/shared/sysdep/kernel-offsets.h @@ -0,0 +1,21 @@ +#include <linux/stddef.h> +#include <linux/sched.h> +#include <linux/elf.h> +#include <linux/crypto.h> +#include <asm/mman.h> + +#define DEFINE(sym, val) \ + asm volatile("\n->" #sym " %0 " #val : : "i" (val)) + +#define STR(x) #x +#define DEFINE_STR(sym, val) asm volatile("\n->" #sym " " STR(val) " " #val: : ) + +#define BLANK() asm volatile("\n->" : : ) + +#define OFFSET(sym, str, mem) \ + DEFINE(sym, offsetof(struct str, mem)); + +void foo(void) +{ +#include <common-offsets.h> +} diff --git a/arch/um/sys-i386/shared/sysdep/ptrace.h b/arch/um/sys-i386/shared/sysdep/ptrace.h new file mode 100644 index 0000000..d50e62e --- /dev/null +++ b/arch/um/sys-i386/shared/sysdep/ptrace.h @@ -0,0 +1,170 @@ +/* + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#ifndef __SYSDEP_I386_PTRACE_H +#define __SYSDEP_I386_PTRACE_H + +#include "user_constants.h" +#include "sysdep/faultinfo.h" + +#define MAX_REG_NR (UM_FRAME_SIZE / sizeof(unsigned long)) +#define MAX_REG_OFFSET (UM_FRAME_SIZE) + +static inline void update_debugregs(int seq) {} + +/* syscall emulation path in ptrace */ + +#ifndef PTRACE_SYSEMU +#define PTRACE_SYSEMU 31 +#endif + +void set_using_sysemu(int value); +int get_using_sysemu(void); +extern int sysemu_supported; + +#include "skas_ptregs.h" + +#define REGS_IP(r) ((r)[HOST_IP]) +#define REGS_SP(r) ((r)[HOST_SP]) +#define REGS_EFLAGS(r) ((r)[HOST_EFLAGS]) +#define REGS_EAX(r) ((r)[HOST_EAX]) +#define REGS_EBX(r) ((r)[HOST_EBX]) +#define REGS_ECX(r) ((r)[HOST_ECX]) +#define REGS_EDX(r) ((r)[HOST_EDX]) +#define REGS_ESI(r) ((r)[HOST_ESI]) +#define REGS_EDI(r) ((r)[HOST_EDI]) +#define REGS_EBP(r) ((r)[HOST_EBP]) +#define REGS_CS(r) ((r)[HOST_CS]) +#define REGS_SS(r) ((r)[HOST_SS]) +#define REGS_DS(r) ((r)[HOST_DS]) +#define REGS_ES(r) ((r)[HOST_ES]) +#define REGS_FS(r) ((r)[HOST_FS]) +#define REGS_GS(r) ((r)[HOST_GS]) + +#define REGS_SET_SYSCALL_RETURN(r, res) REGS_EAX(r) = (res) + +#define REGS_RESTART_SYSCALL(r) IP_RESTART_SYSCALL(REGS_IP(r)) + +#ifndef PTRACE_SYSEMU_SINGLESTEP +#define PTRACE_SYSEMU_SINGLESTEP 32 +#endif + +struct uml_pt_regs { + unsigned long gp[MAX_REG_NR]; + struct faultinfo faultinfo; + long syscall; + int is_user; +}; + +#define EMPTY_UML_PT_REGS { } + +#define UPT_IP(r) REGS_IP((r)->gp) +#define UPT_SP(r) REGS_SP((r)->gp) +#define UPT_EFLAGS(r) REGS_EFLAGS((r)->gp) +#define UPT_EAX(r) REGS_EAX((r)->gp) +#define UPT_EBX(r) REGS_EBX((r)->gp) +#define UPT_ECX(r) REGS_ECX((r)->gp) +#define UPT_EDX(r) REGS_EDX((r)->gp) +#define UPT_ESI(r) REGS_ESI((r)->gp) +#define UPT_EDI(r) REGS_EDI((r)->gp) +#define UPT_EBP(r) REGS_EBP((r)->gp) +#define UPT_ORIG_EAX(r) ((r)->syscall) +#define UPT_CS(r) REGS_CS((r)->gp) +#define UPT_SS(r) REGS_SS((r)->gp) +#define UPT_DS(r) REGS_DS((r)->gp) +#define UPT_ES(r) REGS_ES((r)->gp) +#define UPT_FS(r) REGS_FS((r)->gp) +#define UPT_GS(r) REGS_GS((r)->gp) + +#define UPT_SYSCALL_ARG1(r) UPT_EBX(r) +#define UPT_SYSCALL_ARG2(r) UPT_ECX(r) +#define UPT_SYSCALL_ARG3(r) UPT_EDX(r) +#define UPT_SYSCALL_ARG4(r) UPT_ESI(r) +#define UPT_SYSCALL_ARG5(r) UPT_EDI(r) +#define UPT_SYSCALL_ARG6(r) UPT_EBP(r) + +extern int user_context(unsigned long sp); + +#define UPT_IS_USER(r) ((r)->is_user) + +struct syscall_args { + unsigned long args[6]; +}; + +#define SYSCALL_ARGS(r) ((struct syscall_args) \ + { .args = { UPT_SYSCALL_ARG1(r), \ + UPT_SYSCALL_ARG2(r), \ + UPT_SYSCALL_ARG3(r), \ + UPT_SYSCALL_ARG4(r), \ + UPT_SYSCALL_ARG5(r), \ + UPT_SYSCALL_ARG6(r) } } ) + +#define UPT_REG(regs, reg) \ + ({ unsigned long val; \ + switch(reg){ \ + case EIP: val = UPT_IP(regs); break; \ + case UESP: val = UPT_SP(regs); break; \ + case EAX: val = UPT_EAX(regs); break; \ + case EBX: val = UPT_EBX(regs); break; \ + case ECX: val = UPT_ECX(regs); break; \ + case EDX: val = UPT_EDX(regs); break; \ + case ESI: val = UPT_ESI(regs); break; \ + case EDI: val = UPT_EDI(regs); break; \ + case EBP: val = UPT_EBP(regs); break; \ + case ORIG_EAX: val = UPT_ORIG_EAX(regs); break; \ + case CS: val = UPT_CS(regs); break; \ + case SS: val = UPT_SS(regs); break; \ + case DS: val = UPT_DS(regs); break; \ + case ES: val = UPT_ES(regs); break; \ + case FS: val = UPT_FS(regs); break; \ + case GS: val = UPT_GS(regs); break; \ + case EFL: val = UPT_EFLAGS(regs); break; \ + default : \ + panic("Bad register in UPT_REG : %d\n", reg); \ + val = -1; \ + } \ + val; \ + }) + +#define UPT_SET(regs, reg, val) \ + do { \ + switch(reg){ \ + case EIP: UPT_IP(regs) = val; break; \ + case UESP: UPT_SP(regs) = val; break; \ + case EAX: UPT_EAX(regs) = val; break; \ + case EBX: UPT_EBX(regs) = val; break; \ + case ECX: UPT_ECX(regs) = val; break; \ + case EDX: UPT_EDX(regs) = val; break; \ + case ESI: UPT_ESI(regs) = val; break; \ + case EDI: UPT_EDI(regs) = val; break; \ + case EBP: UPT_EBP(regs) = val; break; \ + case ORIG_EAX: UPT_ORIG_EAX(regs) = val; break; \ + case CS: UPT_CS(regs) = val; break; \ + case SS: UPT_SS(regs) = val; break; \ + case DS: UPT_DS(regs) = val; break; \ + case ES: UPT_ES(regs) = val; break; \ + case FS: UPT_FS(regs) = val; break; \ + case GS: UPT_GS(regs) = val; break; \ + case EFL: UPT_EFLAGS(regs) = val; break; \ + default : \ + panic("Bad register in UPT_SET : %d\n", reg); \ + break; \ + } \ + } while (0) + +#define UPT_SET_SYSCALL_RETURN(r, res) \ + REGS_SET_SYSCALL_RETURN((r)->regs, (res)) + +#define UPT_RESTART_SYSCALL(r) REGS_RESTART_SYSCALL((r)->gp) + +#define UPT_ORIG_SYSCALL(r) UPT_EAX(r) +#define UPT_SYSCALL_NR(r) UPT_ORIG_EAX(r) +#define UPT_SYSCALL_RET(r) UPT_EAX(r) + +#define UPT_FAULTINFO(r) (&(r)->faultinfo) + +extern void arch_init_registers(int pid); + +#endif diff --git a/arch/um/sys-i386/shared/sysdep/ptrace_user.h b/arch/um/sys-i386/shared/sysdep/ptrace_user.h new file mode 100644 index 0000000..ef56247 --- /dev/null +++ b/arch/um/sys-i386/shared/sysdep/ptrace_user.h @@ -0,0 +1,50 @@ +/* + * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __SYSDEP_I386_PTRACE_USER_H__ +#define __SYSDEP_I386_PTRACE_USER_H__ + +#include <sys/ptrace.h> +#include <linux/ptrace.h> +#include <asm/ptrace.h> +#include "user_constants.h" + +#define PT_OFFSET(r) ((r) * sizeof(long)) + +#define PT_SYSCALL_NR(regs) ((regs)[ORIG_EAX]) +#define PT_SYSCALL_NR_OFFSET PT_OFFSET(ORIG_EAX) + +#define PT_SYSCALL_ARG1_OFFSET PT_OFFSET(EBX) +#define PT_SYSCALL_ARG2_OFFSET PT_OFFSET(ECX) +#define PT_SYSCALL_ARG3_OFFSET PT_OFFSET(EDX) +#define PT_SYSCALL_ARG4_OFFSET PT_OFFSET(ESI) +#define PT_SYSCALL_ARG5_OFFSET PT_OFFSET(EDI) +#define PT_SYSCALL_ARG6_OFFSET PT_OFFSET(EBP) + +#define PT_SYSCALL_RET_OFFSET PT_OFFSET(EAX) + +#define REGS_SYSCALL_NR EAX /* This is used before a system call */ +#define REGS_SYSCALL_ARG1 EBX +#define REGS_SYSCALL_ARG2 ECX +#define REGS_SYSCALL_ARG3 EDX +#define REGS_SYSCALL_ARG4 ESI +#define REGS_SYSCALL_ARG5 EDI +#define REGS_SYSCALL_ARG6 EBP + +#define REGS_IP_INDEX EIP +#define REGS_SP_INDEX UESP + +#define PT_IP_OFFSET PT_OFFSET(EIP) +#define PT_IP(regs) ((regs)[EIP]) +#define PT_SP_OFFSET PT_OFFSET(UESP) +#define PT_SP(regs) ((regs)[UESP]) + +#define FP_SIZE ((HOST_FPX_SIZE > HOST_FP_SIZE) ? HOST_FPX_SIZE : HOST_FP_SIZE) + +#ifndef FRAME_SIZE +#define FRAME_SIZE (17) +#endif + +#endif diff --git a/arch/um/sys-i386/shared/sysdep/sc.h b/arch/um/sys-i386/shared/sysdep/sc.h new file mode 100644 index 0000000..c57d178 --- /dev/null +++ b/arch/um/sys-i386/shared/sysdep/sc.h @@ -0,0 +1,44 @@ +#ifndef __SYSDEP_I386_SC_H +#define __SYSDEP_I386_SC_H + +#include <user_constants.h> + +#define SC_OFFSET(sc, field) \ + *((unsigned long *) &(((char *) (sc))[HOST_##field])) +#define SC_FP_OFFSET(sc, field) \ + *((unsigned long *) &(((char *) (SC_FPSTATE(sc)))[HOST_##field])) +#define SC_FP_OFFSET_PTR(sc, field, type) \ + ((type *) &(((char *) (SC_FPSTATE(sc)))[HOST_##field])) + +#define SC_IP(sc) SC_OFFSET(sc, SC_IP) +#define SC_SP(sc) SC_OFFSET(sc, SC_SP) +#define SC_FS(sc) SC_OFFSET(sc, SC_FS) +#define SC_GS(sc) SC_OFFSET(sc, SC_GS) +#define SC_DS(sc) SC_OFFSET(sc, SC_DS) +#define SC_ES(sc) SC_OFFSET(sc, SC_ES) +#define SC_SS(sc) SC_OFFSET(sc, SC_SS) +#define SC_CS(sc) SC_OFFSET(sc, SC_CS) +#define SC_EFLAGS(sc) SC_OFFSET(sc, SC_EFLAGS) +#define SC_EAX(sc) SC_OFFSET(sc, SC_EAX) +#define SC_EBX(sc) SC_OFFSET(sc, SC_EBX) +#define SC_ECX(sc) SC_OFFSET(sc, SC_ECX) +#define SC_EDX(sc) SC_OFFSET(sc, SC_EDX) +#define SC_EDI(sc) SC_OFFSET(sc, SC_EDI) +#define SC_ESI(sc) SC_OFFSET(sc, SC_ESI) +#define SC_EBP(sc) SC_OFFSET(sc, SC_EBP) +#define SC_TRAPNO(sc) SC_OFFSET(sc, SC_TRAPNO) +#define SC_ERR(sc) SC_OFFSET(sc, SC_ERR) +#define SC_CR2(sc) SC_OFFSET(sc, SC_CR2) +#define SC_FPSTATE(sc) SC_OFFSET(sc, SC_FPSTATE) +#define SC_SIGMASK(sc) SC_OFFSET(sc, SC_SIGMASK) +#define SC_FP_CW(sc) SC_FP_OFFSET(sc, SC_FP_CW) +#define SC_FP_SW(sc) SC_FP_OFFSET(sc, SC_FP_SW) +#define SC_FP_TAG(sc) SC_FP_OFFSET(sc, SC_FP_TAG) +#define SC_FP_IPOFF(sc) SC_FP_OFFSET(sc, SC_FP_IPOFF) +#define SC_FP_CSSEL(sc) SC_FP_OFFSET(sc, SC_FP_CSSEL) +#define SC_FP_DATAOFF(sc) SC_FP_OFFSET(sc, SC_FP_DATAOFF) +#define SC_FP_DATASEL(sc) SC_FP_OFFSET(sc, SC_FP_DATASEL) +#define SC_FP_ST(sc) SC_FP_OFFSET_PTR(sc, SC_FP_ST, struct _fpstate) +#define SC_FXSR_ENV(sc) SC_FP_OFFSET_PTR(sc, SC_FXSR_ENV, void) + +#endif diff --git a/arch/um/sys-i386/shared/sysdep/sigcontext.h b/arch/um/sys-i386/shared/sysdep/sigcontext.h new file mode 100644 index 0000000..f583c87 --- /dev/null +++ b/arch/um/sys-i386/shared/sysdep/sigcontext.h @@ -0,0 +1,26 @@ +/* + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#ifndef __SYS_SIGCONTEXT_I386_H +#define __SYS_SIGCONTEXT_I386_H + +#include "sysdep/sc.h" + +#define IP_RESTART_SYSCALL(ip) ((ip) -= 2) + +#define GET_FAULTINFO_FROM_SC(fi, sc) \ + { \ + (fi).cr2 = SC_CR2(sc); \ + (fi).error_code = SC_ERR(sc); \ + (fi).trap_no = SC_TRAPNO(sc); \ + } + +/* This is Page Fault */ +#define SEGV_IS_FIXABLE(fi) ((fi)->trap_no == 14) + +/* SKAS3 has no trap_no on i386, but get_skas_faultinfo() sets it to 0. */ +#define SEGV_MAYBE_FIXABLE(fi) ((fi)->trap_no == 0 && ptrace_faultinfo) + +#endif diff --git a/arch/um/sys-i386/shared/sysdep/skas_ptrace.h b/arch/um/sys-i386/shared/sysdep/skas_ptrace.h new file mode 100644 index 0000000..e27b8a7 --- /dev/null +++ b/arch/um/sys-i386/shared/sysdep/skas_ptrace.h @@ -0,0 +1,22 @@ +/* + * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __SYSDEP_I386_SKAS_PTRACE_H +#define __SYSDEP_I386_SKAS_PTRACE_H + +struct ptrace_faultinfo { + int is_write; + unsigned long addr; +}; + +struct ptrace_ldt { + int func; + void *ptr; + unsigned long bytecount; +}; + +#define PTRACE_LDT 54 + +#endif diff --git a/arch/um/sys-i386/shared/sysdep/stub.h b/arch/um/sys-i386/shared/sysdep/stub.h new file mode 100644 index 0000000..977dedd --- /dev/null +++ b/arch/um/sys-i386/shared/sysdep/stub.h @@ -0,0 +1,101 @@ +/* + * Copyright (C) 2004 Jeff Dike (jdike@addtoit.com) + * Licensed under the GPL + */ + +#ifndef __SYSDEP_STUB_H +#define __SYSDEP_STUB_H + +#include <sys/mman.h> +#include <asm/ptrace.h> +#include <asm/unistd.h> +#include "as-layout.h" +#include "stub-data.h" +#include "kern_constants.h" + +extern void stub_segv_handler(int sig); +extern void stub_clone_handler(void); + +#define STUB_SYSCALL_RET EAX +#define STUB_MMAP_NR __NR_mmap2 +#define MMAP_OFFSET(o) ((o) >> UM_KERN_PAGE_SHIFT) + +static inline long stub_syscall0(long syscall) +{ + long ret; + + __asm__ volatile ("int $0x80" : "=a" (ret) : "0" (syscall)); + + return ret; +} + +static inline long stub_syscall1(long syscall, long arg1) +{ + long ret; + + __asm__ volatile ("int $0x80" : "=a" (ret) : "0" (syscall), "b" (arg1)); + + return ret; +} + +static inline long stub_syscall2(long syscall, long arg1, long arg2) +{ + long ret; + + __asm__ volatile ("int $0x80" : "=a" (ret) : "0" (syscall), "b" (arg1), + "c" (arg2)); + + return ret; +} + +static inline long stub_syscall3(long syscall, long arg1, long arg2, long arg3) +{ + long ret; + + __asm__ volatile ("int $0x80" : "=a" (ret) : "0" (syscall), "b" (arg1), + "c" (arg2), "d" (arg3)); + + return ret; +} + +static inline long stub_syscall4(long syscall, long arg1, long arg2, long arg3, + long arg4) +{ + long ret; + + __asm__ volatile ("int $0x80" : "=a" (ret) : "0" (syscall), "b" (arg1), + "c" (arg2), "d" (arg3), "S" (arg4)); + + return ret; +} + +static inline long stub_syscall5(long syscall, long arg1, long arg2, long arg3, + long arg4, long arg5) +{ + long ret; + + __asm__ volatile ("int $0x80" : "=a" (ret) : "0" (syscall), "b" (arg1), + "c" (arg2), "d" (arg3), "S" (arg4), "D" (arg5)); + + return ret; +} + +static inline void trap_myself(void) +{ + __asm("int3"); +} + +static inline void remap_stack(int fd, unsigned long offset) +{ + __asm__ volatile ("movl %%eax,%%ebp ; movl %0,%%eax ; int $0x80 ;" + "movl %7, %%ebx ; movl %%eax, (%%ebx)" + : : "g" (STUB_MMAP_NR), "b" (STUB_DATA), + "c" (UM_KERN_PAGE_SIZE), + "d" (PROT_READ | PROT_WRITE), + "S" (MAP_FIXED | MAP_SHARED), "D" (fd), + "a" (offset), + "i" (&((struct stub_data *) STUB_DATA)->err) + : "memory"); +} + +#endif diff --git a/arch/um/sys-i386/shared/sysdep/syscalls.h b/arch/um/sys-i386/shared/sysdep/syscalls.h new file mode 100644 index 0000000..9056981 --- /dev/null +++ b/arch/um/sys-i386/shared/sysdep/syscalls.h @@ -0,0 +1,26 @@ +/* + * Copyright (C) 2000 - 2008 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include "asm/unistd.h" +#include "sysdep/ptrace.h" + +typedef long syscall_handler_t(struct pt_regs); + +/* Not declared on x86, incompatible declarations on x86_64, so these have + * to go here rather than in sys_call_table.c + */ +extern syscall_handler_t sys_rt_sigaction; + +extern syscall_handler_t old_mmap_i386; + +extern syscall_handler_t *sys_call_table[]; + +#define EXECUTE_SYSCALL(syscall, regs) \ + ((long (*)(struct syscall_args)) \ + (*sys_call_table[syscall]))(SYSCALL_ARGS(®s->regs)) + +extern long sys_mmap2(unsigned long addr, unsigned long len, + unsigned long prot, unsigned long flags, + unsigned long fd, unsigned long pgoff); diff --git a/arch/um/sys-i386/shared/sysdep/system.h b/arch/um/sys-i386/shared/sysdep/system.h new file mode 100644 index 0000000..d1b93c4 --- /dev/null +++ b/arch/um/sys-i386/shared/sysdep/system.h @@ -0,0 +1,132 @@ +#ifndef _ASM_X86_SYSTEM_H_ +#define _ASM_X86_SYSTEM_H_ + +#include <asm/asm.h> +#include <asm/segment.h> +#include <asm/cpufeature.h> +#include <asm/cmpxchg.h> +#include <asm/nops.h> + +#include <linux/kernel.h> +#include <linux/irqflags.h> + +/* entries in ARCH_DLINFO: */ +#ifdef CONFIG_IA32_EMULATION +# define AT_VECTOR_SIZE_ARCH 2 +#else +# define AT_VECTOR_SIZE_ARCH 1 +#endif + +extern unsigned long arch_align_stack(unsigned long sp); + +void default_idle(void); + +/* + * Force strict CPU ordering. + * And yes, this is required on UP too when we're talking + * to devices. + */ +#ifdef CONFIG_X86_32 +/* + * Some non-Intel clones support out of order store. wmb() ceases to be a + * nop for these. + */ +#define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2) +#define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2) +#define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM) +#else +#define mb() asm volatile("mfence":::"memory") +#define rmb() asm volatile("lfence":::"memory") +#define wmb() asm volatile("sfence" ::: "memory") +#endif + +/** + * read_barrier_depends - Flush all pending reads that subsequents reads + * depend on. + * + * No data-dependent reads from memory-like regions are ever reordered + * over this barrier. All reads preceding this primitive are guaranteed + * to access memory (but not necessarily other CPUs' caches) before any + * reads following this primitive that depend on the data return by + * any of the preceding reads. This primitive is much lighter weight than + * rmb() on most CPUs, and is never heavier weight than is + * rmb(). + * + * These ordering constraints are respected by both the local CPU + * and the compiler. + * + * Ordering is not guaranteed by anything other than these primitives, + * not even by data dependencies. See the documentation for + * memory_barrier() for examples and URLs to more information. + * + * For example, the following code would force ordering (the initial + * value of "a" is zero, "b" is one, and "p" is "&a"): + * + * <programlisting> + * CPU 0 CPU 1 + * + * b = 2; + * memory_barrier(); + * p = &b; q = p; + * read_barrier_depends(); + * d = *q; + * </programlisting> + * + * because the read of "*q" depends on the read of "p" and these + * two reads are separated by a read_barrier_depends(). However, + * the following code, with the same initial values for "a" and "b": + * + * <programlisting> + * CPU 0 CPU 1 + * + * a = 2; + * memory_barrier(); + * b = 3; y = b; + * read_barrier_depends(); + * x = a; + * </programlisting> + * + * does not enforce ordering, since there is no data dependency between + * the read of "a" and the read of "b". Therefore, on some CPUs, such + * as Alpha, "y" could be set to 3 and "x" to 0. Use rmb() + * in cases like this where there are no data dependencies. + **/ + +#define read_barrier_depends() do { } while (0) + +#ifdef CONFIG_SMP +#define smp_mb() mb() +#ifdef CONFIG_X86_PPRO_FENCE +# define smp_rmb() rmb() +#else +# define smp_rmb() barrier() +#endif +#ifdef CONFIG_X86_OOSTORE +# define smp_wmb() wmb() +#else +# define smp_wmb() barrier() +#endif +#define smp_read_barrier_depends() read_barrier_depends() +#define set_mb(var, value) do { (void)xchg(&var, value); } while (0) +#else +#define smp_mb() barrier() +#define smp_rmb() barrier() +#define smp_wmb() barrier() +#define smp_read_barrier_depends() do { } while (0) +#define set_mb(var, value) do { var = value; barrier(); } while (0) +#endif + +/* + * Stop RDTSC speculation. This is needed when you need to use RDTSC + * (or get_cycles or vread that possibly accesses the TSC) in a defined + * code region. + * + * (Could use an alternative three way for this if there was one.) + */ +static inline void rdtsc_barrier(void) +{ + alternative(ASM_NOP3, "mfence", X86_FEATURE_MFENCE_RDTSC); + alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC); +} + +#endif diff --git a/arch/um/sys-i386/shared/sysdep/tls.h b/arch/um/sys-i386/shared/sysdep/tls.h new file mode 100644 index 0000000..3455075 --- /dev/null +++ b/arch/um/sys-i386/shared/sysdep/tls.h @@ -0,0 +1,32 @@ +#ifndef _SYSDEP_TLS_H +#define _SYSDEP_TLS_H + +# ifndef __KERNEL__ + +/* Change name to avoid conflicts with the original one from <asm/ldt.h>, which + * may be named user_desc (but in 2.4 and in header matching its API was named + * modify_ldt_ldt_s). */ + +typedef struct um_dup_user_desc { + unsigned int entry_number; + unsigned int base_addr; + unsigned int limit; + unsigned int seg_32bit:1; + unsigned int contents:2; + unsigned int read_exec_only:1; + unsigned int limit_in_pages:1; + unsigned int seg_not_present:1; + unsigned int useable:1; +} user_desc_t; + +# else /* __KERNEL__ */ + +# include <ldt.h> +typedef struct user_desc user_desc_t; + +# endif /* __KERNEL__ */ + +#define GDT_ENTRY_TLS_MIN_I386 6 +#define GDT_ENTRY_TLS_MIN_X86_64 12 + +#endif /* _SYSDEP_TLS_H */ diff --git a/arch/um/sys-i386/shared/sysdep/vm-flags.h b/arch/um/sys-i386/shared/sysdep/vm-flags.h new file mode 100644 index 0000000..e0d24c5 --- /dev/null +++ b/arch/um/sys-i386/shared/sysdep/vm-flags.h @@ -0,0 +1,14 @@ +/* + * Copyright (C) 2004 Jeff Dike (jdike@addtoit.com) + * Licensed under the GPL + */ + +#ifndef __VM_FLAGS_I386_H +#define __VM_FLAGS_I386_H + +#define VM_DATA_DEFAULT_FLAGS \ + (VM_READ | VM_WRITE | \ + ((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0 ) | \ + VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) + +#endif diff --git a/arch/um/sys-i386/signal.c b/arch/um/sys-i386/signal.c new file mode 100644 index 0000000..1296473 --- /dev/null +++ b/arch/um/sys-i386/signal.c @@ -0,0 +1,508 @@ +/* + * Copyright (C) 2004 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include <linux/ptrace.h> +#include <asm/unistd.h> +#include <asm/uaccess.h> +#include <asm/ucontext.h> +#include "frame_kern.h" +#include "skas.h" + +void copy_sc(struct uml_pt_regs *regs, void *from) +{ + struct sigcontext *sc = from; + + REGS_GS(regs->gp) = sc->gs; + REGS_FS(regs->gp) = sc->fs; + REGS_ES(regs->gp) = sc->es; + REGS_DS(regs->gp) = sc->ds; + REGS_EDI(regs->gp) = sc->di; + REGS_ESI(regs->gp) = sc->si; + REGS_EBP(regs->gp) = sc->bp; + REGS_SP(regs->gp) = sc->sp; + REGS_EBX(regs->gp) = sc->bx; + REGS_EDX(regs->gp) = sc->dx; + REGS_ECX(regs->gp) = sc->cx; + REGS_EAX(regs->gp) = sc->ax; + REGS_IP(regs->gp) = sc->ip; + REGS_CS(regs->gp) = sc->cs; + REGS_EFLAGS(regs->gp) = sc->flags; + REGS_SS(regs->gp) = sc->ss; +} + +/* + * FPU tag word conversions. + */ + +static inline unsigned short twd_i387_to_fxsr(unsigned short twd) +{ + unsigned int tmp; /* to avoid 16 bit prefixes in the code */ + + /* Transform each pair of bits into 01 (valid) or 00 (empty) */ + tmp = ~twd; + tmp = (tmp | (tmp>>1)) & 0x5555; /* 0V0V0V0V0V0V0V0V */ + /* and move the valid bits to the lower byte. */ + tmp = (tmp | (tmp >> 1)) & 0x3333; /* 00VV00VV00VV00VV */ + tmp = (tmp | (tmp >> 2)) & 0x0f0f; /* 0000VVVV0000VVVV */ + tmp = (tmp | (tmp >> 4)) & 0x00ff; /* 00000000VVVVVVVV */ + return tmp; +} + +static inline unsigned long twd_fxsr_to_i387(struct user_fxsr_struct *fxsave) +{ + struct _fpxreg *st = NULL; + unsigned long twd = (unsigned long) fxsave->twd; + unsigned long tag; + unsigned long ret = 0xffff0000; + int i; + +#define FPREG_ADDR(f, n) ((char *)&(f)->st_space + (n) * 16); + + for (i = 0; i < 8; i++) { + if (twd & 0x1) { + st = (struct _fpxreg *) FPREG_ADDR(fxsave, i); + + switch (st->exponent & 0x7fff) { + case 0x7fff: + tag = 2; /* Special */ + break; + case 0x0000: + if ( !st->significand[0] && + !st->significand[1] && + !st->significand[2] && + !st->significand[3] ) { + tag = 1; /* Zero */ + } else { + tag = 2; /* Special */ + } + break; + default: + if (st->significand[3] & 0x8000) { + tag = 0; /* Valid */ + } else { + tag = 2; /* Special */ + } + break; + } + } else { + tag = 3; /* Empty */ + } + ret |= (tag << (2 * i)); + twd = twd >> 1; + } + return ret; +} + +static int convert_fxsr_to_user(struct _fpstate __user *buf, + struct user_fxsr_struct *fxsave) +{ + unsigned long env[7]; + struct _fpreg __user *to; + struct _fpxreg *from; + int i; + + env[0] = (unsigned long)fxsave->cwd | 0xffff0000ul; + env[1] = (unsigned long)fxsave->swd | 0xffff0000ul; + env[2] = twd_fxsr_to_i387(fxsave); + env[3] = fxsave->fip; + env[4] = fxsave->fcs | ((unsigned long)fxsave->fop << 16); + env[5] = fxsave->foo; + env[6] = fxsave->fos; + + if (__copy_to_user(buf, env, 7 * sizeof(unsigned long))) + return 1; + + to = &buf->_st[0]; + from = (struct _fpxreg *) &fxsave->st_space[0]; + for (i = 0; i < 8; i++, to++, from++) { + unsigned long __user *t = (unsigned long __user *)to; + unsigned long *f = (unsigned long *)from; + + if (__put_user(*f, t) || + __put_user(*(f + 1), t + 1) || + __put_user(from->exponent, &to->exponent)) + return 1; + } + return 0; +} + +static int convert_fxsr_from_user(struct user_fxsr_struct *fxsave, + struct _fpstate __user *buf) +{ + unsigned long env[7]; + struct _fpxreg *to; + struct _fpreg __user *from; + int i; + + if (copy_from_user( env, buf, 7 * sizeof(long))) + return 1; + + fxsave->cwd = (unsigned short)(env[0] & 0xffff); + fxsave->swd = (unsigned short)(env[1] & 0xffff); + fxsave->twd = twd_i387_to_fxsr((unsigned short)(env[2] & 0xffff)); + fxsave->fip = env[3]; + fxsave->fop = (unsigned short)((env[4] & 0xffff0000ul) >> 16); + fxsave->fcs = (env[4] & 0xffff); + fxsave->foo = env[5]; + fxsave->fos = env[6]; + + to = (struct _fpxreg *) &fxsave->st_space[0]; + from = &buf->_st[0]; + for (i = 0; i < 8; i++, to++, from++) { + unsigned long *t = (unsigned long *)to; + unsigned long __user *f = (unsigned long __user *)from; + + if (__get_user(*t, f) || + __get_user(*(t + 1), f + 1) || + __get_user(to->exponent, &from->exponent)) + return 1; + } + return 0; +} + +extern int have_fpx_regs; + +static int copy_sc_from_user(struct pt_regs *regs, + struct sigcontext __user *from) +{ + struct sigcontext sc; + int err, pid; + + err = copy_from_user(&sc, from, sizeof(sc)); + if (err) + return err; + + pid = userspace_pid[current_thread_info()->cpu]; + copy_sc(®s->regs, &sc); + if (have_fpx_regs) { + struct user_fxsr_struct fpx; + + err = copy_from_user(&fpx, + &((struct _fpstate __user *)sc.fpstate)->_fxsr_env[0], + sizeof(struct user_fxsr_struct)); + if (err) + return 1; + + err = convert_fxsr_from_user(&fpx, sc.fpstate); + if (err) + return 1; + + err = restore_fpx_registers(pid, (unsigned long *) &fpx); + if (err < 0) { + printk(KERN_ERR "copy_sc_from_user - " + "restore_fpx_registers failed, errno = %d\n", + -err); + return 1; + } + } + else { + struct user_i387_struct fp; + + err = copy_from_user(&fp, sc.fpstate, + sizeof(struct user_i387_struct)); + if (err) + return 1; + + err = restore_fp_registers(pid, (unsigned long *) &fp); + if (err < 0) { + printk(KERN_ERR "copy_sc_from_user - " + "restore_fp_registers failed, errno = %d\n", + -err); + return 1; + } + } + + return 0; +} + +static int copy_sc_to_user(struct sigcontext __user *to, + struct _fpstate __user *to_fp, struct pt_regs *regs, + unsigned long sp) +{ + struct sigcontext sc; + struct faultinfo * fi = ¤t->thread.arch.faultinfo; + int err, pid; + + sc.gs = REGS_GS(regs->regs.gp); + sc.fs = REGS_FS(regs->regs.gp); + sc.es = REGS_ES(regs->regs.gp); + sc.ds = REGS_DS(regs->regs.gp); + sc.di = REGS_EDI(regs->regs.gp); + sc.si = REGS_ESI(regs->regs.gp); + sc.bp = REGS_EBP(regs->regs.gp); + sc.sp = sp; + sc.bx = REGS_EBX(regs->regs.gp); + sc.dx = REGS_EDX(regs->regs.gp); + sc.cx = REGS_ECX(regs->regs.gp); + sc.ax = REGS_EAX(regs->regs.gp); + sc.ip = REGS_IP(regs->regs.gp); + sc.cs = REGS_CS(regs->regs.gp); + sc.flags = REGS_EFLAGS(regs->regs.gp); + sc.sp_at_signal = regs->regs.gp[UESP]; + sc.ss = regs->regs.gp[SS]; + sc.cr2 = fi->cr2; + sc.err = fi->error_code; + sc.trapno = fi->trap_no; + + to_fp = (to_fp ? to_fp : (struct _fpstate __user *) (to + 1)); + sc.fpstate = to_fp; + + pid = userspace_pid[current_thread_info()->cpu]; + if (have_fpx_regs) { + struct user_fxsr_struct fpx; + + err = save_fpx_registers(pid, (unsigned long *) &fpx); + if (err < 0){ + printk(KERN_ERR "copy_sc_to_user - save_fpx_registers " + "failed, errno = %d\n", err); + return 1; + } + + err = convert_fxsr_to_user(to_fp, &fpx); + if (err) + return 1; + + err |= __put_user(fpx.swd, &to_fp->status); + err |= __put_user(X86_FXSR_MAGIC, &to_fp->magic); + if (err) + return 1; + + if (copy_to_user(&to_fp->_fxsr_env[0], &fpx, + sizeof(struct user_fxsr_struct))) + return 1; + } + else { + struct user_i387_struct fp; + + err = save_fp_registers(pid, (unsigned long *) &fp); + if (copy_to_user(to_fp, &fp, sizeof(struct user_i387_struct))) + return 1; + } + + return copy_to_user(to, &sc, sizeof(sc)); +} + +static int copy_ucontext_to_user(struct ucontext __user *uc, + struct _fpstate __user *fp, sigset_t *set, + unsigned long sp) +{ + int err = 0; + + err |= put_user(current->sas_ss_sp, &uc->uc_stack.ss_sp); + err |= put_user(sas_ss_flags(sp), &uc->uc_stack.ss_flags); + err |= put_user(current->sas_ss_size, &uc->uc_stack.ss_size); + err |= copy_sc_to_user(&uc->uc_mcontext, fp, ¤t->thread.regs, sp); + err |= copy_to_user(&uc->uc_sigmask, set, sizeof(*set)); + return err; +} + +struct sigframe +{ + char __user *pretcode; + int sig; + struct sigcontext sc; + struct _fpstate fpstate; + unsigned long extramask[_NSIG_WORDS-1]; + char retcode[8]; +}; + +struct rt_sigframe +{ + char __user *pretcode; + int sig; + struct siginfo __user *pinfo; + void __user *puc; + struct siginfo info; + struct ucontext uc; + struct _fpstate fpstate; + char retcode[8]; +}; + +int setup_signal_stack_sc(unsigned long stack_top, int sig, + struct k_sigaction *ka, struct pt_regs *regs, + sigset_t *mask) +{ + struct sigframe __user *frame; + void __user *restorer; + unsigned long save_sp = PT_REGS_SP(regs); + int err = 0; + + /* This is the same calculation as i386 - ((sp + 4) & 15) == 0 */ + stack_top = ((stack_top + 4) & -16UL) - 4; + frame = (struct sigframe __user *) stack_top - 1; + if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) + return 1; + + restorer = frame->retcode; + if (ka->sa.sa_flags & SA_RESTORER) + restorer = ka->sa.sa_restorer; + + /* Update SP now because the page fault handler refuses to extend + * the stack if the faulting address is too far below the current + * SP, which frame now certainly is. If there's an error, the original + * value is restored on the way out. + * When writing the sigcontext to the stack, we have to write the + * original value, so that's passed to copy_sc_to_user, which does + * the right thing with it. + */ + PT_REGS_SP(regs) = (unsigned long) frame; + + err |= __put_user(restorer, &frame->pretcode); + err |= __put_user(sig, &frame->sig); + err |= copy_sc_to_user(&frame->sc, NULL, regs, save_sp); + err |= __put_user(mask->sig[0], &frame->sc.oldmask); + if (_NSIG_WORDS > 1) + err |= __copy_to_user(&frame->extramask, &mask->sig[1], + sizeof(frame->extramask)); + + /* + * This is popl %eax ; movl $,%eax ; int $0x80 + * + * WE DO NOT USE IT ANY MORE! It's only left here for historical + * reasons and because gdb uses it as a signature to notice + * signal handler stack frames. + */ + err |= __put_user(0xb858, (short __user *)(frame->retcode+0)); + err |= __put_user(__NR_sigreturn, (int __user *)(frame->retcode+2)); + err |= __put_user(0x80cd, (short __user *)(frame->retcode+6)); + + if (err) + goto err; + + PT_REGS_SP(regs) = (unsigned long) frame; + PT_REGS_IP(regs) = (unsigned long) ka->sa.sa_handler; + PT_REGS_EAX(regs) = (unsigned long) sig; + PT_REGS_EDX(regs) = (unsigned long) 0; + PT_REGS_ECX(regs) = (unsigned long) 0; + + if ((current->ptrace & PT_DTRACE) && (current->ptrace & PT_PTRACED)) + ptrace_notify(SIGTRAP); + return 0; + +err: + PT_REGS_SP(regs) = save_sp; + return err; +} + +int setup_signal_stack_si(unsigned long stack_top, int sig, + struct k_sigaction *ka, struct pt_regs *regs, + siginfo_t *info, sigset_t *mask) +{ + struct rt_sigframe __user *frame; + void __user *restorer; + unsigned long save_sp = PT_REGS_SP(regs); + int err = 0; + + stack_top &= -8UL; + frame = (struct rt_sigframe __user *) stack_top - 1; + if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) + return 1; + + restorer = frame->retcode; + if (ka->sa.sa_flags & SA_RESTORER) + restorer = ka->sa.sa_restorer; + + /* See comment above about why this is here */ + PT_REGS_SP(regs) = (unsigned long) frame; + + err |= __put_user(restorer, &frame->pretcode); + err |= __put_user(sig, &frame->sig); + err |= __put_user(&frame->info, &frame->pinfo); + err |= __put_user(&frame->uc, &frame->puc); + err |= copy_siginfo_to_user(&frame->info, info); + err |= copy_ucontext_to_user(&frame->uc, &frame->fpstate, mask, + save_sp); + + /* + * This is movl $,%eax ; int $0x80 + * + * WE DO NOT USE IT ANY MORE! It's only left here for historical + * reasons and because gdb uses it as a signature to notice + * signal handler stack frames. + */ + err |= __put_user(0xb8, (char __user *)(frame->retcode+0)); + err |= __put_user(__NR_rt_sigreturn, (int __user *)(frame->retcode+1)); + err |= __put_user(0x80cd, (short __user *)(frame->retcode+5)); + + if (err) + goto err; + + PT_REGS_IP(regs) = (unsigned long) ka->sa.sa_handler; + PT_REGS_EAX(regs) = (unsigned long) sig; + PT_REGS_EDX(regs) = (unsigned long) &frame->info; + PT_REGS_ECX(regs) = (unsigned long) &frame->uc; + + if ((current->ptrace & PT_DTRACE) && (current->ptrace & PT_PTRACED)) + ptrace_notify(SIGTRAP); + return 0; + +err: + PT_REGS_SP(regs) = save_sp; + return err; +} + +long sys_sigreturn(struct pt_regs regs) +{ + unsigned long sp = PT_REGS_SP(¤t->thread.regs); + struct sigframe __user *frame = (struct sigframe __user *)(sp - 8); + sigset_t set; + struct sigcontext __user *sc = &frame->sc; + unsigned long __user *oldmask = &sc->oldmask; + unsigned long __user *extramask = frame->extramask; + int sig_size = (_NSIG_WORDS - 1) * sizeof(unsigned long); + + if (copy_from_user(&set.sig[0], oldmask, sizeof(set.sig[0])) || + copy_from_user(&set.sig[1], extramask, sig_size)) + goto segfault; + + sigdelsetmask(&set, ~_BLOCKABLE); + + spin_lock_irq(¤t->sighand->siglock); + current->blocked = set; + recalc_sigpending(); + spin_unlock_irq(¤t->sighand->siglock); + + if (copy_sc_from_user(¤t->thread.regs, sc)) + goto segfault; + + /* Avoid ERESTART handling */ + PT_REGS_SYSCALL_NR(¤t->thread.regs) = -1; + return PT_REGS_SYSCALL_RET(¤t->thread.regs); + + segfault: + force_sig(SIGSEGV, current); + return 0; +} + +long sys_rt_sigreturn(struct pt_regs regs) +{ + unsigned long sp = PT_REGS_SP(¤t->thread.regs); + struct rt_sigframe __user *frame = + (struct rt_sigframe __user *) (sp - 4); + sigset_t set; + struct ucontext __user *uc = &frame->uc; + int sig_size = _NSIG_WORDS * sizeof(unsigned long); + + if (copy_from_user(&set, &uc->uc_sigmask, sig_size)) + goto segfault; + + sigdelsetmask(&set, ~_BLOCKABLE); + + spin_lock_irq(¤t->sighand->siglock); + current->blocked = set; + recalc_sigpending(); + spin_unlock_irq(¤t->sighand->siglock); + + if (copy_sc_from_user(¤t->thread.regs, &uc->uc_mcontext)) + goto segfault; + + /* Avoid ERESTART handling */ + PT_REGS_SYSCALL_NR(¤t->thread.regs) = -1; + return PT_REGS_SYSCALL_RET(¤t->thread.regs); + + segfault: + force_sig(SIGSEGV, current); + return 0; +} diff --git a/arch/um/sys-i386/stub.S b/arch/um/sys-i386/stub.S new file mode 100644 index 0000000..c41b04b --- /dev/null +++ b/arch/um/sys-i386/stub.S @@ -0,0 +1,51 @@ +#include "as-layout.h" + + .globl syscall_stub +.section .__syscall_stub, "x" + + .globl batch_syscall_stub +batch_syscall_stub: + /* load pointer to first operation */ + mov $(STUB_DATA+8), %esp + +again: + /* load length of additional data */ + mov 0x0(%esp), %eax + + /* if(length == 0) : end of list */ + /* write possible 0 to header */ + mov %eax, STUB_DATA+4 + cmpl $0, %eax + jz done + + /* save current pointer */ + mov %esp, STUB_DATA+4 + + /* skip additional data */ + add %eax, %esp + + /* load syscall-# */ + pop %eax + + /* load syscall params */ + pop %ebx + pop %ecx + pop %edx + pop %esi + pop %edi + pop %ebp + + /* execute syscall */ + int $0x80 + + /* check return value */ + pop %ebx + cmp %ebx, %eax + je again + +done: + /* save return value */ + mov %eax, STUB_DATA + + /* stop */ + int3 diff --git a/arch/um/sys-i386/stub_segv.c b/arch/um/sys-i386/stub_segv.c new file mode 100644 index 0000000..28ccf73 --- /dev/null +++ b/arch/um/sys-i386/stub_segv.c @@ -0,0 +1,17 @@ +/* + * Copyright (C) 2004 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include "sysdep/stub.h" +#include "sysdep/sigcontext.h" + +void __attribute__ ((__section__ (".__syscall_stub"))) +stub_segv_handler(int sig) +{ + struct sigcontext *sc = (struct sigcontext *) (&sig + 1); + + GET_FAULTINFO_FROM_SC(*((struct faultinfo *) STUB_DATA), sc); + + trap_myself(); +} diff --git a/arch/um/sys-i386/sys_call_table.S b/arch/um/sys-i386/sys_call_table.S new file mode 100644 index 0000000..00e5f52 --- /dev/null +++ b/arch/um/sys-i386/sys_call_table.S @@ -0,0 +1,17 @@ +#include <linux/linkage.h> +/* Steal i386 syscall table for our purposes, but with some slight changes.*/ + +#define sys_iopl sys_ni_syscall +#define sys_ioperm sys_ni_syscall + +#define sys_vm86old sys_ni_syscall +#define sys_vm86 sys_ni_syscall + +#define old_mmap old_mmap_i386 + +.section .rodata,"a" + +#include "../../x86/kernel/syscall_table_32.S" + +ENTRY(syscall_table_size) +.long .-sys_call_table diff --git a/arch/um/sys-i386/syscalls.c b/arch/um/sys-i386/syscalls.c new file mode 100644 index 0000000..857ca0b --- /dev/null +++ b/arch/um/sys-i386/syscalls.c @@ -0,0 +1,203 @@ +/* + * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com) + * Licensed under the GPL + */ + +#include "linux/sched.h" +#include "linux/shm.h" +#include "linux/ipc.h" +#include "linux/syscalls.h" +#include "asm/mman.h" +#include "asm/uaccess.h" +#include "asm/unistd.h" + +/* + * Perform the select(nd, in, out, ex, tv) and mmap() system + * calls. Linux/i386 didn't use to be able to handle more than + * 4 system call parameters, so these system calls used a memory + * block for parameter passing.. + */ + +struct mmap_arg_struct { + unsigned long addr; + unsigned long len; + unsigned long prot; + unsigned long flags; + unsigned long fd; + unsigned long offset; +}; + +extern int old_mmap(unsigned long addr, unsigned long len, + unsigned long prot, unsigned long flags, + unsigned long fd, unsigned long offset); + +long old_mmap_i386(struct mmap_arg_struct __user *arg) +{ + struct mmap_arg_struct a; + int err = -EFAULT; + + if (copy_from_user(&a, arg, sizeof(a))) + goto out; + + err = old_mmap(a.addr, a.len, a.prot, a.flags, a.fd, a.offset); + out: + return err; +} + +struct sel_arg_struct { + unsigned long n; + fd_set __user *inp; + fd_set __user *outp; + fd_set __user *exp; + struct timeval __user *tvp; +}; + +long old_select(struct sel_arg_struct __user *arg) +{ + struct sel_arg_struct a; + + if (copy_from_user(&a, arg, sizeof(a))) + return -EFAULT; + /* sys_select() does the appropriate kernel locking */ + return sys_select(a.n, a.inp, a.outp, a.exp, a.tvp); +} + +/* + * The prototype on i386 is: + * + * int clone(int flags, void * child_stack, int * parent_tidptr, struct user_desc * newtls, int * child_tidptr) + * + * and the "newtls" arg. on i386 is read by copy_thread directly from the + * register saved on the stack. + */ +long sys_clone(unsigned long clone_flags, unsigned long newsp, + int __user *parent_tid, void *newtls, int __user *child_tid) +{ + long ret; + + if (!newsp) + newsp = UPT_SP(¤t->thread.regs.regs); + + current->thread.forking = 1; + ret = do_fork(clone_flags, newsp, ¤t->thread.regs, 0, parent_tid, + child_tid); + current->thread.forking = 0; + return ret; +} + +/* + * sys_ipc() is the de-multiplexer for the SysV IPC calls.. + * + * This is really horribly ugly. + */ +long sys_ipc (uint call, int first, int second, + int third, void __user *ptr, long fifth) +{ + int version, ret; + + version = call >> 16; /* hack for backward compatibility */ + call &= 0xffff; + + switch (call) { + case SEMOP: + return sys_semtimedop(first, (struct sembuf __user *) ptr, + second, NULL); + case SEMTIMEDOP: + return sys_semtimedop(first, (struct sembuf __user *) ptr, + second, + (const struct timespec __user *) fifth); + case SEMGET: + return sys_semget (first, second, third); + case SEMCTL: { + union semun fourth; + if (!ptr) + return -EINVAL; + if (get_user(fourth.__pad, (void __user * __user *) ptr)) + return -EFAULT; + return sys_semctl (first, second, third, fourth); + } + + case MSGSND: + return sys_msgsnd (first, (struct msgbuf *) ptr, + second, third); + case MSGRCV: + switch (version) { + case 0: { + struct ipc_kludge tmp; + if (!ptr) + return -EINVAL; + + if (copy_from_user(&tmp, + (struct ipc_kludge *) ptr, + sizeof (tmp))) + return -EFAULT; + return sys_msgrcv (first, tmp.msgp, second, + tmp.msgtyp, third); + } + default: + panic("msgrcv with version != 0"); + return sys_msgrcv (first, + (struct msgbuf *) ptr, + second, fifth, third); + } + case MSGGET: + return sys_msgget ((key_t) first, second); + case MSGCTL: + return sys_msgctl (first, second, (struct msqid_ds *) ptr); + + case SHMAT: + switch (version) { + default: { + ulong raddr; + ret = do_shmat (first, (char *) ptr, second, &raddr); + if (ret) + return ret; + return put_user (raddr, (ulong *) third); + } + case 1: /* iBCS2 emulator entry point */ + if (!segment_eq(get_fs(), get_ds())) + return -EINVAL; + return do_shmat (first, (char *) ptr, second, (ulong *) third); + } + case SHMDT: + return sys_shmdt ((char *)ptr); + case SHMGET: + return sys_shmget (first, second, third); + case SHMCTL: + return sys_shmctl (first, second, + (struct shmid_ds *) ptr); + default: + return -ENOSYS; + } +} + +long sys_sigaction(int sig, const struct old_sigaction __user *act, + struct old_sigaction __user *oact) +{ + struct k_sigaction new_ka, old_ka; + int ret; + + if (act) { + old_sigset_t mask; + if (!access_ok(VERIFY_READ, act, sizeof(*act)) || + __get_user(new_ka.sa.sa_handler, &act->sa_handler) || + __get_user(new_ka.sa.sa_restorer, &act->sa_restorer)) + return -EFAULT; + __get_user(new_ka.sa.sa_flags, &act->sa_flags); + __get_user(mask, &act->sa_mask); + siginitset(&new_ka.sa.sa_mask, mask); + } + + ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); + + if (!ret && oact) { + if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) || + __put_user(old_ka.sa.sa_handler, &oact->sa_handler) || + __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer)) + return -EFAULT; + __put_user(old_ka.sa.sa_flags, &oact->sa_flags); + __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask); + } + + return ret; +} diff --git a/arch/um/sys-i386/sysrq.c b/arch/um/sys-i386/sysrq.c new file mode 100644 index 0000000..171b3e9 --- /dev/null +++ b/arch/um/sys-i386/sysrq.c @@ -0,0 +1,101 @@ +/* + * Copyright (C) 2001 - 2003 Jeff Dike (jdike@addtoit.com) + * Licensed under the GPL + */ + +#include "linux/kernel.h" +#include "linux/smp.h" +#include "linux/sched.h" +#include "linux/kallsyms.h" +#include "asm/ptrace.h" +#include "sysrq.h" + +/* This is declared by <linux/sched.h> */ +void show_regs(struct pt_regs *regs) +{ + printk("\n"); + printk("EIP: %04lx:[<%08lx>] CPU: %d %s", + 0xffff & PT_REGS_CS(regs), PT_REGS_IP(regs), + smp_processor_id(), print_tainted()); + if (PT_REGS_CS(regs) & 3) + printk(" ESP: %04lx:%08lx", 0xffff & PT_REGS_SS(regs), + PT_REGS_SP(regs)); + printk(" EFLAGS: %08lx\n %s\n", PT_REGS_EFLAGS(regs), + print_tainted()); + printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n", + PT_REGS_EAX(regs), PT_REGS_EBX(regs), + PT_REGS_ECX(regs), + PT_REGS_EDX(regs)); + printk("ESI: %08lx EDI: %08lx EBP: %08lx", + PT_REGS_ESI(regs), PT_REGS_EDI(regs), + PT_REGS_EBP(regs)); + printk(" DS: %04lx ES: %04lx\n", + 0xffff & PT_REGS_DS(regs), + 0xffff & PT_REGS_ES(regs)); + + show_trace(NULL, (unsigned long *) ®s); +} + +/* Copied from i386. */ +static inline int valid_stack_ptr(struct thread_info *tinfo, void *p) +{ + return p > (void *)tinfo && + p < (void *)tinfo + THREAD_SIZE - 3; +} + +/* Adapted from i386 (we also print the address we read from). */ +static inline unsigned long print_context_stack(struct thread_info *tinfo, + unsigned long *stack, unsigned long ebp) +{ + unsigned long addr; + +#ifdef CONFIG_FRAME_POINTER + while (valid_stack_ptr(tinfo, (void *)ebp)) { + addr = *(unsigned long *)(ebp + 4); + printk("%08lx: [<%08lx>]", ebp + 4, addr); + print_symbol(" %s", addr); + printk("\n"); + ebp = *(unsigned long *)ebp; + } +#else + while (valid_stack_ptr(tinfo, stack)) { + addr = *stack; + if (__kernel_text_address(addr)) { + printk("%08lx: [<%08lx>]", (unsigned long) stack, addr); + print_symbol(" %s", addr); + printk("\n"); + } + stack++; + } +#endif + return ebp; +} + +void show_trace(struct task_struct* task, unsigned long * stack) +{ + unsigned long ebp; + struct thread_info *context; + + /* Turn this into BUG_ON if possible. */ + if (!stack) { + stack = (unsigned long*) &stack; + printk("show_trace: got NULL stack, implicit assumption task == current"); + WARN_ON(1); + } + + if (!task) + task = current; + + if (task != current) { + ebp = (unsigned long) KSTK_EBP(task); + } else { + asm ("movl %%ebp, %0" : "=r" (ebp) : ); + } + + context = (struct thread_info *) + ((unsigned long)stack & (~(THREAD_SIZE - 1))); + print_context_stack(context, stack, ebp); + + printk("\n"); +} + diff --git a/arch/um/sys-i386/tls.c b/arch/um/sys-i386/tls.c new file mode 100644 index 0000000..c6c7131 --- /dev/null +++ b/arch/um/sys-i386/tls.c @@ -0,0 +1,396 @@ +/* + * Copyright (C) 2005 Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it> + * Licensed under the GPL + */ + +#include "linux/percpu.h" +#include "linux/sched.h" +#include "asm/uaccess.h" +#include "os.h" +#include "skas.h" +#include "sysdep/tls.h" + +/* + * If needed we can detect when it's uninitialized. + * + * These are initialized in an initcall and unchanged thereafter. + */ +static int host_supports_tls = -1; +int host_gdt_entry_tls_min; + +int do_set_thread_area(struct user_desc *info) +{ + int ret; + u32 cpu; + + cpu = get_cpu(); + ret = os_set_thread_area(info, userspace_pid[cpu]); + put_cpu(); + + if (ret) + printk(KERN_ERR "PTRACE_SET_THREAD_AREA failed, err = %d, " + "index = %d\n", ret, info->entry_number); + + return ret; +} + +int do_get_thread_area(struct user_desc *info) +{ + int ret; + u32 cpu; + + cpu = get_cpu(); + ret = os_get_thread_area(info, userspace_pid[cpu]); + put_cpu(); + + if (ret) + printk(KERN_ERR "PTRACE_GET_THREAD_AREA failed, err = %d, " + "index = %d\n", ret, info->entry_number); + + return ret; +} + +/* + * sys_get_thread_area: get a yet unused TLS descriptor index. + * XXX: Consider leaving one free slot for glibc usage at first place. This must + * be done here (and by changing GDT_ENTRY_TLS_* macros) and nowhere else. + * + * Also, this must be tested when compiling in SKAS mode with dynamic linking + * and running against NPTL. + */ +static int get_free_idx(struct task_struct* task) +{ + struct thread_struct *t = &task->thread; + int idx; + + if (!t->arch.tls_array) + return GDT_ENTRY_TLS_MIN; + + for (idx = 0; idx < GDT_ENTRY_TLS_ENTRIES; idx++) + if (!t->arch.tls_array[idx].present) + return idx + GDT_ENTRY_TLS_MIN; + return -ESRCH; +} + +static inline void clear_user_desc(struct user_desc* info) +{ + /* Postcondition: LDT_empty(info) returns true. */ + memset(info, 0, sizeof(*info)); + + /* + * Check the LDT_empty or the i386 sys_get_thread_area code - we obtain + * indeed an empty user_desc. + */ + info->read_exec_only = 1; + info->seg_not_present = 1; +} + +#define O_FORCE 1 + +static int load_TLS(int flags, struct task_struct *to) +{ + int ret = 0; + int idx; + + for (idx = GDT_ENTRY_TLS_MIN; idx < GDT_ENTRY_TLS_MAX; idx++) { + struct uml_tls_struct* curr = + &to->thread.arch.tls_array[idx - GDT_ENTRY_TLS_MIN]; + + /* + * Actually, now if it wasn't flushed it gets cleared and + * flushed to the host, which will clear it. + */ + if (!curr->present) { + if (!curr->flushed) { + clear_user_desc(&curr->tls); + curr->tls.entry_number = idx; + } else { + WARN_ON(!LDT_empty(&curr->tls)); + continue; + } + } + + if (!(flags & O_FORCE) && curr->flushed) + continue; + + ret = do_set_thread_area(&curr->tls); + if (ret) + goto out; + + curr->flushed = 1; + } +out: + return ret; +} + +/* + * Verify if we need to do a flush for the new process, i.e. if there are any + * present desc's, only if they haven't been flushed. + */ +static inline int needs_TLS_update(struct task_struct *task) +{ + int i; + int ret = 0; + + for (i = GDT_ENTRY_TLS_MIN; i < GDT_ENTRY_TLS_MAX; i++) { + struct uml_tls_struct* curr = + &task->thread.arch.tls_array[i - GDT_ENTRY_TLS_MIN]; + + /* + * Can't test curr->present, we may need to clear a descriptor + * which had a value. + */ + if (curr->flushed) + continue; + ret = 1; + break; + } + return ret; +} + +/* + * On a newly forked process, the TLS descriptors haven't yet been flushed. So + * we mark them as such and the first switch_to will do the job. + */ +void clear_flushed_tls(struct task_struct *task) +{ + int i; + + for (i = GDT_ENTRY_TLS_MIN; i < GDT_ENTRY_TLS_MAX; i++) { + struct uml_tls_struct* curr = + &task->thread.arch.tls_array[i - GDT_ENTRY_TLS_MIN]; + + /* + * Still correct to do this, if it wasn't present on the host it + * will remain as flushed as it was. + */ + if (!curr->present) + continue; + + curr->flushed = 0; + } +} + +/* + * In SKAS0 mode, currently, multiple guest threads sharing the same ->mm have a + * common host process. So this is needed in SKAS0 too. + * + * However, if each thread had a different host process (and this was discussed + * for SMP support) this won't be needed. + * + * And this will not need be used when (and if) we'll add support to the host + * SKAS patch. + */ + +int arch_switch_tls(struct task_struct *to) +{ + if (!host_supports_tls) + return 0; + + /* + * We have no need whatsoever to switch TLS for kernel threads; beyond + * that, that would also result in us calling os_set_thread_area with + * userspace_pid[cpu] == 0, which gives an error. + */ + if (likely(to->mm)) + return load_TLS(O_FORCE, to); + + return 0; +} + +static int set_tls_entry(struct task_struct* task, struct user_desc *info, + int idx, int flushed) +{ + struct thread_struct *t = &task->thread; + + if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) + return -EINVAL; + + t->arch.tls_array[idx - GDT_ENTRY_TLS_MIN].tls = *info; + t->arch.tls_array[idx - GDT_ENTRY_TLS_MIN].present = 1; + t->arch.tls_array[idx - GDT_ENTRY_TLS_MIN].flushed = flushed; + + return 0; +} + +int arch_copy_tls(struct task_struct *new) +{ + struct user_desc info; + int idx, ret = -EFAULT; + + if (copy_from_user(&info, + (void __user *) UPT_ESI(&new->thread.regs.regs), + sizeof(info))) + goto out; + + ret = -EINVAL; + if (LDT_empty(&info)) + goto out; + + idx = info.entry_number; + + ret = set_tls_entry(new, &info, idx, 0); +out: + return ret; +} + +/* XXX: use do_get_thread_area to read the host value? I'm not at all sure! */ +static int get_tls_entry(struct task_struct *task, struct user_desc *info, + int idx) +{ + struct thread_struct *t = &task->thread; + + if (!t->arch.tls_array) + goto clear; + + if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) + return -EINVAL; + + if (!t->arch.tls_array[idx - GDT_ENTRY_TLS_MIN].present) + goto clear; + + *info = t->arch.tls_array[idx - GDT_ENTRY_TLS_MIN].tls; + +out: + /* + * Temporary debugging check, to make sure that things have been + * flushed. This could be triggered if load_TLS() failed. + */ + if (unlikely(task == current && + !t->arch.tls_array[idx - GDT_ENTRY_TLS_MIN].flushed)) { + printk(KERN_ERR "get_tls_entry: task with pid %d got here " + "without flushed TLS.", current->pid); + } + + return 0; +clear: + /* + * When the TLS entry has not been set, the values read to user in the + * tls_array are 0 (because it's cleared at boot, see + * arch/i386/kernel/head.S:cpu_gdt_table). Emulate that. + */ + clear_user_desc(info); + info->entry_number = idx; + goto out; +} + +int sys_set_thread_area(struct user_desc __user *user_desc) +{ + struct user_desc info; + int idx, ret; + + if (!host_supports_tls) + return -ENOSYS; + + if (copy_from_user(&info, user_desc, sizeof(info))) + return -EFAULT; + + idx = info.entry_number; + + if (idx == -1) { + idx = get_free_idx(current); + if (idx < 0) + return idx; + info.entry_number = idx; + /* Tell the user which slot we chose for him.*/ + if (put_user(idx, &user_desc->entry_number)) + return -EFAULT; + } + + ret = do_set_thread_area(&info); + if (ret) + return ret; + return set_tls_entry(current, &info, idx, 1); +} + +/* + * Perform set_thread_area on behalf of the traced child. + * Note: error handling is not done on the deferred load, and this differ from + * i386. However the only possible error are caused by bugs. + */ +int ptrace_set_thread_area(struct task_struct *child, int idx, + struct user_desc __user *user_desc) +{ + struct user_desc info; + + if (!host_supports_tls) + return -EIO; + + if (copy_from_user(&info, user_desc, sizeof(info))) + return -EFAULT; + + return set_tls_entry(child, &info, idx, 0); +} + +int sys_get_thread_area(struct user_desc __user *user_desc) +{ + struct user_desc info; + int idx, ret; + + if (!host_supports_tls) + return -ENOSYS; + + if (get_user(idx, &user_desc->entry_number)) + return -EFAULT; + + ret = get_tls_entry(current, &info, idx); + if (ret < 0) + goto out; + + if (copy_to_user(user_desc, &info, sizeof(info))) + ret = -EFAULT; + +out: + return ret; +} + +/* + * Perform get_thread_area on behalf of the traced child. + */ +int ptrace_get_thread_area(struct task_struct *child, int idx, + struct user_desc __user *user_desc) +{ + struct user_desc info; + int ret; + + if (!host_supports_tls) + return -EIO; + + ret = get_tls_entry(child, &info, idx); + if (ret < 0) + goto out; + + if (copy_to_user(user_desc, &info, sizeof(info))) + ret = -EFAULT; +out: + return ret; +} + +/* + * This code is really i386-only, but it detects and logs x86_64 GDT indexes + * if a 32-bit UML is running on a 64-bit host. + */ +static int __init __setup_host_supports_tls(void) +{ + check_host_supports_tls(&host_supports_tls, &host_gdt_entry_tls_min); + if (host_supports_tls) { + printk(KERN_INFO "Host TLS support detected\n"); + printk(KERN_INFO "Detected host type: "); + switch (host_gdt_entry_tls_min) { + case GDT_ENTRY_TLS_MIN_I386: + printk(KERN_CONT "i386"); + break; + case GDT_ENTRY_TLS_MIN_X86_64: + printk(KERN_CONT "x86_64"); + break; + } + printk(KERN_CONT " (GDT indexes %d to %d)\n", + host_gdt_entry_tls_min, + host_gdt_entry_tls_min + GDT_ENTRY_TLS_ENTRIES); + } else + printk(KERN_ERR " Host TLS support NOT detected! " + "TLS support inside UML will not work\n"); + return 0; +} + +__initcall(__setup_host_supports_tls); diff --git a/arch/um/sys-i386/user-offsets.c b/arch/um/sys-i386/user-offsets.c new file mode 100644 index 0000000..5f883bf --- /dev/null +++ b/arch/um/sys-i386/user-offsets.c @@ -0,0 +1,53 @@ +#include <stdio.h> +#include <stddef.h> +#include <signal.h> +#include <sys/poll.h> +#include <sys/user.h> +#include <sys/mman.h> +#include <asm/ptrace.h> + +#define DEFINE(sym, val) \ + asm volatile("\n->" #sym " %0 " #val : : "i" (val)) + +#define DEFINE_LONGS(sym, val) \ + asm volatile("\n->" #sym " %0 " #val : : "i" (val/sizeof(unsigned long))) + +#define OFFSET(sym, str, mem) \ + DEFINE(sym, offsetof(struct str, mem)); + +void foo(void) +{ + OFFSET(HOST_SC_TRAPNO, sigcontext, trapno); + OFFSET(HOST_SC_ERR, sigcontext, err); + OFFSET(HOST_SC_CR2, sigcontext, cr2); + + DEFINE_LONGS(HOST_FP_SIZE, sizeof(struct user_fpregs_struct)); + DEFINE_LONGS(HOST_FPX_SIZE, sizeof(struct user_fpxregs_struct)); + + DEFINE(HOST_IP, EIP); + DEFINE(HOST_SP, UESP); + DEFINE(HOST_EFLAGS, EFL); + DEFINE(HOST_EAX, EAX); + DEFINE(HOST_EBX, EBX); + DEFINE(HOST_ECX, ECX); + DEFINE(HOST_EDX, EDX); + DEFINE(HOST_ESI, ESI); + DEFINE(HOST_EDI, EDI); + DEFINE(HOST_EBP, EBP); + DEFINE(HOST_CS, CS); + DEFINE(HOST_SS, SS); + DEFINE(HOST_DS, DS); + DEFINE(HOST_FS, FS); + DEFINE(HOST_ES, ES); + DEFINE(HOST_GS, GS); + DEFINE(UM_FRAME_SIZE, sizeof(struct user_regs_struct)); + + /* XXX Duplicated between i386 and x86_64 */ + DEFINE(UM_POLLIN, POLLIN); + DEFINE(UM_POLLPRI, POLLPRI); + DEFINE(UM_POLLOUT, POLLOUT); + + DEFINE(UM_PROT_READ, PROT_READ); + DEFINE(UM_PROT_WRITE, PROT_WRITE); + DEFINE(UM_PROT_EXEC, PROT_EXEC); +} diff --git a/arch/um/sys-ia64/Makefile b/arch/um/sys-ia64/Makefile new file mode 100644 index 0000000..d02f4c2 --- /dev/null +++ b/arch/um/sys-ia64/Makefile @@ -0,0 +1,11 @@ +OBJ = built-in.o + +OBJS = + +all: $(OBJ) + +$(OBJ): $(OBJS) + rm -f $@ + $(LD) $(LINKFLAGS) --start-group $^ --end-group -o $@ + +clean-files := $(OBJS) link.ld diff --git a/arch/um/sys-ia64/sysdep/ptrace.h b/arch/um/sys-ia64/sysdep/ptrace.h new file mode 100644 index 0000000..42dd8fb --- /dev/null +++ b/arch/um/sys-ia64/sysdep/ptrace.h @@ -0,0 +1,26 @@ +/* + * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __SYSDEP_IA64_PTRACE_H +#define __SYSDEP_IA64_PTRACE_H + +struct sys_pt_regs { + int foo; +}; + +#define EMPTY_REGS { 0 } + +#endif + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff --git a/arch/um/sys-ia64/sysdep/sigcontext.h b/arch/um/sys-ia64/sysdep/sigcontext.h new file mode 100644 index 0000000..f15fb25 --- /dev/null +++ b/arch/um/sys-ia64/sysdep/sigcontext.h @@ -0,0 +1,20 @@ +/* + * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __SYSDEP_IA64_SIGCONTEXT_H +#define __SYSDEP_IA64_SIGCONTEXT_H + +#endif + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff --git a/arch/um/sys-ia64/sysdep/skas_ptrace.h b/arch/um/sys-ia64/sysdep/skas_ptrace.h new file mode 100644 index 0000000..25a38e7 --- /dev/null +++ b/arch/um/sys-ia64/sysdep/skas_ptrace.h @@ -0,0 +1,22 @@ +/* + * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __SYSDEP_IA64_SKAS_PTRACE_H +#define __SYSDEP_IA64_SKAS_PTRACE_H + +struct ptrace_faultinfo { + int is_write; + unsigned long addr; +}; + +struct ptrace_ldt { + int func; + void *ptr; + unsigned long bytecount; +}; + +#define PTRACE_LDT 54 + +#endif diff --git a/arch/um/sys-ia64/sysdep/syscalls.h b/arch/um/sys-ia64/sysdep/syscalls.h new file mode 100644 index 0000000..4a1f46e --- /dev/null +++ b/arch/um/sys-ia64/sysdep/syscalls.h @@ -0,0 +1,20 @@ +/* + * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __SYSDEP_IA64_SYSCALLS_H +#define __SYSDEP_IA64_SYSCALLS_H + +#endif + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff --git a/arch/um/sys-ppc/Makefile b/arch/um/sys-ppc/Makefile new file mode 100644 index 0000000..b8bc844 --- /dev/null +++ b/arch/um/sys-ppc/Makefile @@ -0,0 +1,65 @@ +OBJ = built-in.o + +.S.o: + $(CC) $(KBUILD_AFLAGS) -D__ASSEMBLY__ -D__UM_PPC__ -c $< -o $*.o + +OBJS = ptrace.o sigcontext.o checksum.o miscthings.o misc.o \ + ptrace_user.o sysrq.o + +EXTRA_AFLAGS := -DCONFIG_PPC32 -I. -I$(srctree)/arch/ppc/kernel + +all: $(OBJ) + +$(OBJ): $(OBJS) + rm -f $@ + $(LD) $(LINKFLAGS) --start-group $^ --end-group -o $@ + +ptrace_user.o: ptrace_user.c + $(CC) -D__KERNEL__ $(USER_CFLAGS) $(EXTRA_CFLAGS) -c -o $@ $< + +sigcontext.o: sigcontext.c + $(CC) $(USER_CFLAGS) $(EXTRA_CFLAGS) -c -o $@ $< + +checksum.S: + rm -f $@ + ln -s $(srctree)/arch/ppc/lib/$@ $@ + +mk_defs.c: + rm -f $@ + ln -s $(srctree)/arch/ppc/kernel/$@ $@ + +ppc_defs.head: + rm -f $@ + ln -s $(srctree)/arch/ppc/kernel/$@ $@ + +ppc_defs.h: mk_defs.c ppc_defs.head \ + $(srctree)/include/asm-ppc/mmu.h \ + $(srctree)/include/asm-ppc/processor.h \ + $(srctree)/include/asm-ppc/pgtable.h \ + $(srctree)/include/asm-ppc/ptrace.h +# $(CC) $(CFLAGS) -S mk_defs.c + cp ppc_defs.head ppc_defs.h +# for bk, this way we can write to the file even if it's not checked out + echo '#define THREAD 608' >> ppc_defs.h + echo '#define PT_REGS 8' >> ppc_defs.h + echo '#define CLONE_VM 256' >> ppc_defs.h +# chmod u+w ppc_defs.h +# grep '^#define' mk_defs.s >> ppc_defs.h +# rm mk_defs.s + +# the asm link is horrible, and breaks the other targets. This is also +# not going to work with parallel makes. + +checksum.o: checksum.S + rm -f asm + ln -s $(srctree)/include/asm-ppc asm + $(CC) $(EXTRA_AFLAGS) $(KBUILD_AFLAGS) -D__ASSEMBLY__ -D__UM_PPC__ -c $< -o $*.o + rm -f asm + +misc.o: misc.S ppc_defs.h + rm -f asm + ln -s $(srctree)/include/asm-ppc asm + $(CC) $(EXTRA_AFLAGS) $(KBUILD_AFLAGS) -D__ASSEMBLY__ -D__UM_PPC__ -c $< -o $*.o + rm -f asm + +clean-files := $(OBJS) ppc_defs.h checksum.S mk_defs.c diff --git a/arch/um/sys-ppc/asm/archparam.h b/arch/um/sys-ppc/asm/archparam.h new file mode 100644 index 0000000..4269d8a --- /dev/null +++ b/arch/um/sys-ppc/asm/archparam.h @@ -0,0 +1,8 @@ +#ifndef __UM_ARCHPARAM_PPC_H +#define __UM_ARCHPARAM_PPC_H + +/********* Bits for asm-um/string.h **********/ + +#define __HAVE_ARCH_STRRCHR + +#endif diff --git a/arch/um/sys-ppc/asm/elf.h b/arch/um/sys-ppc/asm/elf.h new file mode 100644 index 0000000..af9463c --- /dev/null +++ b/arch/um/sys-ppc/asm/elf.h @@ -0,0 +1,53 @@ +#ifndef __UM_ELF_PPC_H +#define __UM_ELF_PPC_H + + +extern long elf_aux_hwcap; +#define ELF_HWCAP (elf_aux_hwcap) + +#define SET_PERSONALITY(ex) do ; while(0) + +#define ELF_EXEC_PAGESIZE 4096 + +#define elf_check_arch(x) (1) + +#ifdef CONFIG_64BIT +#define ELF_CLASS ELFCLASS64 +#else +#define ELF_CLASS ELFCLASS32 +#endif + +#define USE_ELF_CORE_DUMP + +#define R_386_NONE 0 +#define R_386_32 1 +#define R_386_PC32 2 +#define R_386_GOT32 3 +#define R_386_PLT32 4 +#define R_386_COPY 5 +#define R_386_GLOB_DAT 6 +#define R_386_JMP_SLOT 7 +#define R_386_RELATIVE 8 +#define R_386_GOTOFF 9 +#define R_386_GOTPC 10 +#define R_386_NUM 11 + +#define ELF_PLATFORM (0) + +#define ELF_ET_DYN_BASE (0x08000000) + +/* the following stolen from asm-ppc/elf.h */ +#define ELF_NGREG 48 /* includes nip, msr, lr, etc. */ +#define ELF_NFPREG 33 /* includes fpscr */ +/* General registers */ +typedef unsigned long elf_greg_t; +typedef elf_greg_t elf_gregset_t[ELF_NGREG]; + +/* Floating point registers */ +typedef double elf_fpreg_t; +typedef elf_fpreg_t elf_fpregset_t[ELF_NFPREG]; + +#define ELF_DATA ELFDATA2MSB +#define ELF_ARCH EM_PPC + +#endif diff --git a/arch/um/sys-ppc/asm/processor.h b/arch/um/sys-ppc/asm/processor.h new file mode 100644 index 0000000..9593231 --- /dev/null +++ b/arch/um/sys-ppc/asm/processor.h @@ -0,0 +1,15 @@ +#ifndef __UM_PROCESSOR_PPC_H +#define __UM_PROCESSOR_PPC_H + +#if defined(__ASSEMBLY__) + +#define CONFIG_PPC_MULTIPLATFORM +#include "arch/processor.h" + +#else + +#include "asm/processor-generic.h" + +#endif + +#endif diff --git a/arch/um/sys-ppc/misc.S b/arch/um/sys-ppc/misc.S new file mode 100644 index 0000000..1364b7d --- /dev/null +++ b/arch/um/sys-ppc/misc.S @@ -0,0 +1,111 @@ +/* + * This file contains miscellaneous low-level functions. + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Largely rewritten by Cort Dougan (cort@cs.nmt.edu) + * and Paul Mackerras. + * + * A couple of functions stolen from arch/ppc/kernel/misc.S for UML + * by Chris Emerson. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <asm/processor.h> +#include "ppc_asm.h" + +#if defined(CONFIG_4xx) || defined(CONFIG_8xx) +#define CACHE_LINE_SIZE 16 +#define LG_CACHE_LINE_SIZE 4 +#define MAX_COPY_PREFETCH 1 +#else +#define CACHE_LINE_SIZE 32 +#define LG_CACHE_LINE_SIZE 5 +#define MAX_COPY_PREFETCH 4 +#endif /* CONFIG_4xx || CONFIG_8xx */ + + .text + +/* + * Clear a page using the dcbz instruction, which doesn't cause any + * memory traffic (except to write out any cache lines which get + * displaced). This only works on cacheable memory. + */ +_GLOBAL(clear_page) + li r0,4096/CACHE_LINE_SIZE + mtctr r0 +#ifdef CONFIG_8xx + li r4, 0 +1: stw r4, 0(r3) + stw r4, 4(r3) + stw r4, 8(r3) + stw r4, 12(r3) +#else +1: dcbz 0,r3 +#endif + addi r3,r3,CACHE_LINE_SIZE + bdnz 1b + blr + +/* + * Copy a whole page. We use the dcbz instruction on the destination + * to reduce memory traffic (it eliminates the unnecessary reads of + * the destination into cache). This requires that the destination + * is cacheable. + */ +#define COPY_16_BYTES \ + lwz r6,4(r4); \ + lwz r7,8(r4); \ + lwz r8,12(r4); \ + lwzu r9,16(r4); \ + stw r6,4(r3); \ + stw r7,8(r3); \ + stw r8,12(r3); \ + stwu r9,16(r3) + +_GLOBAL(copy_page) + addi r3,r3,-4 + addi r4,r4,-4 + li r5,4 + +#ifndef CONFIG_8xx +#if MAX_COPY_PREFETCH > 1 + li r0,MAX_COPY_PREFETCH + li r11,4 + mtctr r0 +11: dcbt r11,r4 + addi r11,r11,CACHE_LINE_SIZE + bdnz 11b +#else /* MAX_COPY_PREFETCH == 1 */ + dcbt r5,r4 + li r11,CACHE_LINE_SIZE+4 +#endif /* MAX_COPY_PREFETCH */ +#endif /* CONFIG_8xx */ + + li r0,4096/CACHE_LINE_SIZE + mtctr r0 +1: +#ifndef CONFIG_8xx + dcbt r11,r4 + dcbz r5,r3 +#endif + COPY_16_BYTES +#if CACHE_LINE_SIZE >= 32 + COPY_16_BYTES +#if CACHE_LINE_SIZE >= 64 + COPY_16_BYTES + COPY_16_BYTES +#if CACHE_LINE_SIZE >= 128 + COPY_16_BYTES + COPY_16_BYTES + COPY_16_BYTES + COPY_16_BYTES +#endif +#endif +#endif + bdnz 1b + blr diff --git a/arch/um/sys-ppc/miscthings.c b/arch/um/sys-ppc/miscthings.c new file mode 100644 index 0000000..373061c --- /dev/null +++ b/arch/um/sys-ppc/miscthings.c @@ -0,0 +1,53 @@ +#include "linux/threads.h" +#include "linux/stddef.h" // for NULL +#include "linux/elf.h" // for AT_NULL + +/* The following function nicked from arch/ppc/kernel/process.c and + * adapted slightly */ +/* + * XXX ld.so expects the auxiliary table to start on + * a 16-byte boundary, so we have to find it and + * move it up. :-( + */ +void shove_aux_table(unsigned long sp) +{ + int argc; + char *p; + unsigned long e; + unsigned long aux_start, offset; + + argc = *(int *)sp; + sp += sizeof(int) + (argc + 1) * sizeof(char *); + /* skip over the environment pointers */ + do { + p = *(char **)sp; + sp += sizeof(char *); + } while (p != NULL); + aux_start = sp; + /* skip to the end of the auxiliary table */ + do { + e = *(unsigned long *)sp; + sp += 2 * sizeof(unsigned long); + } while (e != AT_NULL); + offset = ((aux_start + 15) & ~15) - aux_start; + if (offset != 0) { + do { + sp -= sizeof(unsigned long); + e = *(unsigned long *)sp; + *(unsigned long *)(sp + offset) = e; + } while (sp > aux_start); + } +} +/* END stuff taken from arch/ppc/kernel/process.c */ + + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff --git a/arch/um/sys-ppc/ptrace.c b/arch/um/sys-ppc/ptrace.c new file mode 100644 index 0000000..8e71b47 --- /dev/null +++ b/arch/um/sys-ppc/ptrace.c @@ -0,0 +1,68 @@ +#include "linux/sched.h" +#include "asm/ptrace.h" + +int putreg(struct task_struct *child, unsigned long regno, + unsigned long value) +{ + child->thread.process_regs.regs[regno >> 2] = value; + return 0; +} + +int poke_user(struct task_struct *child, long addr, long data) +{ + if ((addr & 3) || addr < 0) + return -EIO; + + if (addr < MAX_REG_OFFSET) + return putreg(child, addr, data); + + else if((addr >= offsetof(struct user, u_debugreg[0])) && + (addr <= offsetof(struct user, u_debugreg[7]))){ + addr -= offsetof(struct user, u_debugreg[0]); + addr = addr >> 2; + if((addr == 4) || (addr == 5)) return -EIO; + child->thread.arch.debugregs[addr] = data; + return 0; + } + return -EIO; +} + +unsigned long getreg(struct task_struct *child, unsigned long regno) +{ + unsigned long retval = ~0UL; + + retval &= child->thread.process_regs.regs[regno >> 2]; + return retval; +} + +int peek_user(struct task_struct *child, long addr, long data) +{ + /* read the word at location addr in the USER area. */ + unsigned long tmp; + + if ((addr & 3) || addr < 0) + return -EIO; + + tmp = 0; /* Default return condition */ + if(addr < MAX_REG_OFFSET){ + tmp = getreg(child, addr); + } + else if((addr >= offsetof(struct user, u_debugreg[0])) && + (addr <= offsetof(struct user, u_debugreg[7]))){ + addr -= offsetof(struct user, u_debugreg[0]); + addr = addr >> 2; + tmp = child->thread.arch.debugregs[addr]; + } + return put_user(tmp, (unsigned long *) data); +} + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff --git a/arch/um/sys-ppc/ptrace_user.c b/arch/um/sys-ppc/ptrace_user.c new file mode 100644 index 0000000..ff0b9c0 --- /dev/null +++ b/arch/um/sys-ppc/ptrace_user.c @@ -0,0 +1,39 @@ +#include <errno.h> +#include <asm/ptrace.h> +#include "sysdep/ptrace.h" + +int ptrace_getregs(long pid, unsigned long *regs_out) +{ + int i; + for (i=0; i < sizeof(struct sys_pt_regs)/sizeof(PPC_REG); ++i) { + errno = 0; + regs_out->regs[i] = ptrace(PTRACE_PEEKUSR, pid, i*4, 0); + if (errno) { + return -errno; + } + } + return 0; +} + +int ptrace_setregs(long pid, unsigned long *regs_in) +{ + int i; + for (i=0; i < sizeof(struct sys_pt_regs)/sizeof(PPC_REG); ++i) { + if (i != 34 /* FIXME: PT_ORIG_R3 */ && i <= PT_MQ) { + if (ptrace(PTRACE_POKEUSR, pid, i*4, regs_in->regs[i]) < 0) { + return -errno; + } + } + } + return 0; +} +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff --git a/arch/um/sys-ppc/shared/sysdep/ptrace.h b/arch/um/sys-ppc/shared/sysdep/ptrace.h new file mode 100644 index 0000000..df2397d --- /dev/null +++ b/arch/um/sys-ppc/shared/sysdep/ptrace.h @@ -0,0 +1,103 @@ +/* + * Licensed under the GPL + */ + +#ifndef __SYS_PTRACE_PPC_H +#define __SYS_PTRACE_PPC_H + +#include "linux/types.h" + +/* the following taken from <asm-ppc/ptrace.h> */ + +#ifdef CONFIG_PPC64 +#define PPC_REG unsigned long /*long*/ +#else +#define PPC_REG unsigned long +#endif +struct sys_pt_regs_s { + PPC_REG gpr[32]; + PPC_REG nip; + PPC_REG msr; + PPC_REG orig_gpr3; /* Used for restarting system calls */ + PPC_REG ctr; + PPC_REG link; + PPC_REG xer; + PPC_REG ccr; + PPC_REG mq; /* 601 only (not used at present) */ + /* Used on APUS to hold IPL value. */ + PPC_REG trap; /* Reason for being here */ + PPC_REG dar; /* Fault registers */ + PPC_REG dsisr; + PPC_REG result; /* Result of a system call */ +}; + +#define NUM_REGS (sizeof(struct sys_pt_regs_s) / sizeof(PPC_REG)) + +struct sys_pt_regs { + PPC_REG regs[sizeof(struct sys_pt_regs_s) / sizeof(PPC_REG)]; +}; + +#define UM_MAX_REG (PT_FPR0) +#define UM_MAX_REG_OFFSET (UM_MAX_REG * sizeof(PPC_REG)) + +#define EMPTY_REGS { { [ 0 ... NUM_REGS - 1] = 0 } } + +#define UM_REG(r, n) ((r)->regs[n]) + +#define UM_SYSCALL_RET(r) UM_REG(r, PT_R3) +#define UM_SP(r) UM_REG(r, PT_R1) +#define UM_IP(r) UM_REG(r, PT_NIP) +#define UM_ELF_ZERO(r) UM_REG(r, PT_FPSCR) +#define UM_SYSCALL_NR(r) UM_REG(r, PT_R0) +#define UM_SYSCALL_ARG1(r) UM_REG(r, PT_ORIG_R3) +#define UM_SYSCALL_ARG2(r) UM_REG(r, PT_R4) +#define UM_SYSCALL_ARG3(r) UM_REG(r, PT_R5) +#define UM_SYSCALL_ARG4(r) UM_REG(r, PT_R6) +#define UM_SYSCALL_ARG5(r) UM_REG(r, PT_R7) +#define UM_SYSCALL_ARG6(r) UM_REG(r, PT_R8) + +#define UM_SYSCALL_NR_OFFSET (PT_R0 * sizeof(PPC_REG)) +#define UM_SYSCALL_RET_OFFSET (PT_R3 * sizeof(PPC_REG)) +#define UM_SYSCALL_ARG1_OFFSET (PT_R3 * sizeof(PPC_REG)) +#define UM_SYSCALL_ARG2_OFFSET (PT_R4 * sizeof(PPC_REG)) +#define UM_SYSCALL_ARG3_OFFSET (PT_R5 * sizeof(PPC_REG)) +#define UM_SYSCALL_ARG4_OFFSET (PT_R6 * sizeof(PPC_REG)) +#define UM_SYSCALL_ARG5_OFFSET (PT_R7 * sizeof(PPC_REG)) +#define UM_SYSCALL_ARG6_OFFSET (PT_R8 * sizeof(PPC_REG)) +#define UM_SP_OFFSET (PT_R1 * sizeof(PPC_REG)) +#define UM_IP_OFFSET (PT_NIP * sizeof(PPC_REG)) +#define UM_ELF_ZERO_OFFSET (PT_R3 * sizeof(PPC_REG)) + +#define UM_SET_SYSCALL_RETURN(_regs, result) \ +do { \ + if (result < 0) { \ + (_regs)->regs[PT_CCR] |= 0x10000000; \ + UM_SYSCALL_RET((_regs)) = -result; \ + } else { \ + UM_SYSCALL_RET((_regs)) = result; \ + } \ +} while(0) + +extern void shove_aux_table(unsigned long sp); +#define UM_FIX_EXEC_STACK(sp) shove_aux_table(sp); + +/* These aren't actually defined. The undefs are just to make sure + * everyone's clear on the concept. + */ +#undef UML_HAVE_GETREGS +#undef UML_HAVE_GETFPREGS +#undef UML_HAVE_SETREGS +#undef UML_HAVE_SETFPREGS + +#endif + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff --git a/arch/um/sys-ppc/shared/sysdep/sigcontext.h b/arch/um/sys-ppc/shared/sysdep/sigcontext.h new file mode 100644 index 0000000..f20d965 --- /dev/null +++ b/arch/um/sys-ppc/shared/sysdep/sigcontext.h @@ -0,0 +1,62 @@ +/* + * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __SYS_SIGCONTEXT_PPC_H +#define __SYS_SIGCONTEXT_PPC_H + +#define DSISR_WRITE 0x02000000 + +#define SC_FAULT_ADDR(sc) ({ \ + struct sigcontext *_sc = (sc); \ + long retval = -1; \ + switch (_sc->regs->trap) { \ + case 0x300: \ + /* data exception */ \ + retval = _sc->regs->dar; \ + break; \ + case 0x400: \ + /* instruction exception */ \ + retval = _sc->regs->nip; \ + break; \ + default: \ + panic("SC_FAULT_ADDR: unhandled trap type\n"); \ + } \ + retval; \ + }) + +#define SC_FAULT_WRITE(sc) ({ \ + struct sigcontext *_sc = (sc); \ + long retval = -1; \ + switch (_sc->regs->trap) { \ + case 0x300: \ + /* data exception */ \ + retval = !!(_sc->regs->dsisr & DSISR_WRITE); \ + break; \ + case 0x400: \ + /* instruction exception: not a write */ \ + retval = 0; \ + break; \ + default: \ + panic("SC_FAULT_ADDR: unhandled trap type\n"); \ + } \ + retval; \ + }) + +#define SC_IP(sc) ((sc)->regs->nip) +#define SC_SP(sc) ((sc)->regs->gpr[1]) +#define SEGV_IS_FIXABLE(sc) (1) + +#endif + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff --git a/arch/um/sys-ppc/shared/sysdep/skas_ptrace.h b/arch/um/sys-ppc/shared/sysdep/skas_ptrace.h new file mode 100644 index 0000000..d9fbbac --- /dev/null +++ b/arch/um/sys-ppc/shared/sysdep/skas_ptrace.h @@ -0,0 +1,22 @@ +/* + * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __SYSDEP_PPC_SKAS_PTRACE_H +#define __SYSDEP_PPC_SKAS_PTRACE_H + +struct ptrace_faultinfo { + int is_write; + unsigned long addr; +}; + +struct ptrace_ldt { + int func; + void *ptr; + unsigned long bytecount; +}; + +#define PTRACE_LDT 54 + +#endif diff --git a/arch/um/sys-ppc/shared/sysdep/syscalls.h b/arch/um/sys-ppc/shared/sysdep/syscalls.h new file mode 100644 index 0000000..679df35 --- /dev/null +++ b/arch/um/sys-ppc/shared/sysdep/syscalls.h @@ -0,0 +1,53 @@ +/* + * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +typedef long syscall_handler_t(unsigned long arg1, unsigned long arg2, + unsigned long arg3, unsigned long arg4, + unsigned long arg5, unsigned long arg6); + +#define EXECUTE_SYSCALL(syscall, regs) \ + (*sys_call_table[syscall])(UM_SYSCALL_ARG1(®s), \ + UM_SYSCALL_ARG2(®s), \ + UM_SYSCALL_ARG3(®s), \ + UM_SYSCALL_ARG4(®s), \ + UM_SYSCALL_ARG5(®s), \ + UM_SYSCALL_ARG6(®s)) + +extern syscall_handler_t sys_mincore; +extern syscall_handler_t sys_madvise; + +/* old_mmap needs the correct prototype since syscall_kern.c includes + * this file. + */ +int old_mmap(unsigned long addr, unsigned long len, + unsigned long prot, unsigned long flags, + unsigned long fd, unsigned long offset); + +#define ARCH_SYSCALLS \ + [ __NR_modify_ldt ] = sys_ni_syscall, \ + [ __NR_pciconfig_read ] = sys_ni_syscall, \ + [ __NR_pciconfig_write ] = sys_ni_syscall, \ + [ __NR_pciconfig_iobase ] = sys_ni_syscall, \ + [ __NR_pivot_root ] = sys_ni_syscall, \ + [ __NR_multiplexer ] = sys_ni_syscall, \ + [ __NR_mmap ] = old_mmap, \ + [ __NR_madvise ] = sys_madvise, \ + [ __NR_mincore ] = sys_mincore, \ + [ __NR_iopl ] = (syscall_handler_t *) sys_ni_syscall, \ + [ __NR_utimes ] = (syscall_handler_t *) sys_utimes, \ + [ __NR_fadvise64 ] = (syscall_handler_t *) sys_fadvise64, + +#define LAST_ARCH_SYSCALL __NR_fadvise64 + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff --git a/arch/um/sys-ppc/sigcontext.c b/arch/um/sys-ppc/sigcontext.c new file mode 100644 index 0000000..4bdc15c --- /dev/null +++ b/arch/um/sys-ppc/sigcontext.c @@ -0,0 +1,14 @@ +#include "asm/ptrace.h" +#include "asm/sigcontext.h" +#include "sysdep/ptrace.h" + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff --git a/arch/um/sys-ppc/sysrq.c b/arch/um/sys-ppc/sysrq.c new file mode 100644 index 0000000..2f816f1 --- /dev/null +++ b/arch/um/sys-ppc/sysrq.c @@ -0,0 +1,31 @@ +/* + * Copyright (C) 2001 Chris Emerson (cemerson@chiark.greenend.org.uk) + * Licensed under the GPL + */ + +#include "linux/kernel.h" +#include "linux/smp.h" +#include "asm/ptrace.h" +#include "sysrq.h" + +void show_regs(struct pt_regs_subarch *regs) +{ + printk("\n"); + printk("show_regs(): insert regs here.\n"); +#if 0 + printk("\n"); + printk("EIP: %04x:[<%08lx>] CPU: %d",0xffff & regs->xcs, regs->eip, + smp_processor_id()); + if (regs->xcs & 3) + printk(" ESP: %04x:%08lx",0xffff & regs->xss, regs->esp); + printk(" EFLAGS: %08lx\n", regs->eflags); + printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n", + regs->eax, regs->ebx, regs->ecx, regs->edx); + printk("ESI: %08lx EDI: %08lx EBP: %08lx", + regs->esi, regs->edi, regs->ebp); + printk(" DS: %04x ES: %04x\n", + 0xffff & regs->xds, 0xffff & regs->xes); +#endif + + show_trace(current, ®s->gpr[1]); +} diff --git a/arch/um/sys-x86_64/Makefile b/arch/um/sys-x86_64/Makefile new file mode 100644 index 0000000..c8b4cce --- /dev/null +++ b/arch/um/sys-x86_64/Makefile @@ -0,0 +1,26 @@ +# +# Copyright 2003 PathScale, Inc. +# +# Licensed under the GPL +# + +obj-y = bug.o bugs.o delay.o fault.o ldt.o mem.o ptrace.o ptrace_user.o \ + setjmp.o signal.o stub.o stub_segv.o syscalls.o syscall_table.o \ + sysrq.o ksyms.o tls.o + +obj-$(CONFIG_MODULES) += um_module.o + +subarch-obj-y = lib/csum-partial_64.o lib/memcpy_64.o lib/thunk_64.o +subarch-obj-$(CONFIG_MODULES) += kernel/module_64.o + +ldt-y = ../sys-i386/ldt.o + +USER_OBJS := ptrace_user.o + +USER_OBJS += user-offsets.s +extra-y += user-offsets.s + +UNPROFILE_OBJS := stub_segv.o +CFLAGS_stub_segv.o := $(CFLAGS_NO_HARDENING) + +include arch/um/scripts/Makefile.rules diff --git a/arch/um/sys-x86_64/asm/archparam.h b/arch/um/sys-x86_64/asm/archparam.h new file mode 100644 index 0000000..270ed95 --- /dev/null +++ b/arch/um/sys-x86_64/asm/archparam.h @@ -0,0 +1,26 @@ +/* + * Copyright 2003 PathScale, Inc. + * + * Licensed under the GPL + */ + +#ifndef __UM_ARCHPARAM_X86_64_H +#define __UM_ARCHPARAM_X86_64_H + + +/* No user-accessible fixmap addresses, i.e. vsyscall */ +#define FIXADDR_USER_START 0 +#define FIXADDR_USER_END 0 + +#endif + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff --git a/arch/um/sys-x86_64/asm/elf.h b/arch/um/sys-x86_64/asm/elf.h new file mode 100644 index 0000000..6e8a919 --- /dev/null +++ b/arch/um/sys-x86_64/asm/elf.h @@ -0,0 +1,119 @@ +/* + * Copyright 2003 PathScale, Inc. + * Copyright (C) 2003 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * + * Licensed under the GPL + */ +#ifndef __UM_ELF_X86_64_H +#define __UM_ELF_X86_64_H + +#include <asm/user.h> +#include "skas.h" + +/* x86-64 relocation types, taken from asm-x86_64/elf.h */ +#define R_X86_64_NONE 0 /* No reloc */ +#define R_X86_64_64 1 /* Direct 64 bit */ +#define R_X86_64_PC32 2 /* PC relative 32 bit signed */ +#define R_X86_64_GOT32 3 /* 32 bit GOT entry */ +#define R_X86_64_PLT32 4 /* 32 bit PLT address */ +#define R_X86_64_COPY 5 /* Copy symbol at runtime */ +#define R_X86_64_GLOB_DAT 6 /* Create GOT entry */ +#define R_X86_64_JUMP_SLOT 7 /* Create PLT entry */ +#define R_X86_64_RELATIVE 8 /* Adjust by program base */ +#define R_X86_64_GOTPCREL 9 /* 32 bit signed pc relative + offset to GOT */ +#define R_X86_64_32 10 /* Direct 32 bit zero extended */ +#define R_X86_64_32S 11 /* Direct 32 bit sign extended */ +#define R_X86_64_16 12 /* Direct 16 bit zero extended */ +#define R_X86_64_PC16 13 /* 16 bit sign extended pc relative */ +#define R_X86_64_8 14 /* Direct 8 bit sign extended */ +#define R_X86_64_PC8 15 /* 8 bit sign extended pc relative */ + +#define R_X86_64_NUM 16 + +typedef unsigned long elf_greg_t; + +#define ELF_NGREG (sizeof (struct user_regs_struct) / sizeof(elf_greg_t)) +typedef elf_greg_t elf_gregset_t[ELF_NGREG]; + +typedef struct user_i387_struct elf_fpregset_t; + +/* + * This is used to ensure we don't load something for the wrong architecture. + */ +#define elf_check_arch(x) \ + ((x)->e_machine == EM_X86_64) + +#define ELF_CLASS ELFCLASS64 +#define ELF_DATA ELFDATA2LSB +#define ELF_ARCH EM_X86_64 + +#define ELF_PLAT_INIT(regs, load_addr) do { \ + PT_REGS_RBX(regs) = 0; \ + PT_REGS_RCX(regs) = 0; \ + PT_REGS_RDX(regs) = 0; \ + PT_REGS_RSI(regs) = 0; \ + PT_REGS_RDI(regs) = 0; \ + PT_REGS_RBP(regs) = 0; \ + PT_REGS_RAX(regs) = 0; \ + PT_REGS_R8(regs) = 0; \ + PT_REGS_R9(regs) = 0; \ + PT_REGS_R10(regs) = 0; \ + PT_REGS_R11(regs) = 0; \ + PT_REGS_R12(regs) = 0; \ + PT_REGS_R13(regs) = 0; \ + PT_REGS_R14(regs) = 0; \ + PT_REGS_R15(regs) = 0; \ +} while (0) + +#define ELF_CORE_COPY_REGS(pr_reg, regs) \ + (pr_reg)[0] = (regs)->regs.gp[0]; \ + (pr_reg)[1] = (regs)->regs.gp[1]; \ + (pr_reg)[2] = (regs)->regs.gp[2]; \ + (pr_reg)[3] = (regs)->regs.gp[3]; \ + (pr_reg)[4] = (regs)->regs.gp[4]; \ + (pr_reg)[5] = (regs)->regs.gp[5]; \ + (pr_reg)[6] = (regs)->regs.gp[6]; \ + (pr_reg)[7] = (regs)->regs.gp[7]; \ + (pr_reg)[8] = (regs)->regs.gp[8]; \ + (pr_reg)[9] = (regs)->regs.gp[9]; \ + (pr_reg)[10] = (regs)->regs.gp[10]; \ + (pr_reg)[11] = (regs)->regs.gp[11]; \ + (pr_reg)[12] = (regs)->regs.gp[12]; \ + (pr_reg)[13] = (regs)->regs.gp[13]; \ + (pr_reg)[14] = (regs)->regs.gp[14]; \ + (pr_reg)[15] = (regs)->regs.gp[15]; \ + (pr_reg)[16] = (regs)->regs.gp[16]; \ + (pr_reg)[17] = (regs)->regs.gp[17]; \ + (pr_reg)[18] = (regs)->regs.gp[18]; \ + (pr_reg)[19] = (regs)->regs.gp[19]; \ + (pr_reg)[20] = (regs)->regs.gp[20]; \ + (pr_reg)[21] = current->thread.arch.fs; \ + (pr_reg)[22] = 0; \ + (pr_reg)[23] = 0; \ + (pr_reg)[24] = 0; \ + (pr_reg)[25] = 0; \ + (pr_reg)[26] = 0; + +extern int elf_core_copy_fpregs(struct task_struct *t, elf_fpregset_t *fpu); + +#define ELF_CORE_COPY_FPREGS(t, fpu) elf_core_copy_fpregs(t, fpu) + +#ifdef TIF_IA32 /* XXX */ +#error XXX, indeed + clear_thread_flag(TIF_IA32); +#endif + +#define USE_ELF_CORE_DUMP +#define ELF_EXEC_PAGESIZE 4096 + +#define ELF_ET_DYN_BASE (2 * TASK_SIZE / 3) + +extern long elf_aux_hwcap; +#define ELF_HWCAP (elf_aux_hwcap) + +#define ELF_PLATFORM "x86_64" + +#define SET_PERSONALITY(ex) do ; while(0) + +#endif diff --git a/arch/um/sys-x86_64/asm/module.h b/arch/um/sys-x86_64/asm/module.h new file mode 100644 index 0000000..35b5491 --- /dev/null +++ b/arch/um/sys-x86_64/asm/module.h @@ -0,0 +1,30 @@ +/* + * Copyright 2003 PathScale, Inc. + * + * Licensed under the GPL + */ + +#ifndef __UM_MODULE_X86_64_H +#define __UM_MODULE_X86_64_H + +/* UML is simple */ +struct mod_arch_specific +{ +}; + +#define Elf_Shdr Elf64_Shdr +#define Elf_Sym Elf64_Sym +#define Elf_Ehdr Elf64_Ehdr + +#endif + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff --git a/arch/um/sys-x86_64/asm/processor.h b/arch/um/sys-x86_64/asm/processor.h new file mode 100644 index 0000000..875a26a --- /dev/null +++ b/arch/um/sys-x86_64/asm/processor.h @@ -0,0 +1,56 @@ +/* + * Copyright 2003 PathScale, Inc. + * + * Licensed under the GPL + */ + +#ifndef __UM_PROCESSOR_X86_64_H +#define __UM_PROCESSOR_X86_64_H + +/* include faultinfo structure */ +#include "sysdep/faultinfo.h" + +struct arch_thread { + unsigned long debugregs[8]; + int debugregs_seq; + unsigned long fs; + struct faultinfo faultinfo; +}; + +/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */ +static inline void rep_nop(void) +{ + __asm__ __volatile__("rep;nop": : :"memory"); +} + +#define cpu_relax() rep_nop() + +#define INIT_ARCH_THREAD { .debugregs = { [ 0 ... 7 ] = 0 }, \ + .debugregs_seq = 0, \ + .fs = 0, \ + .faultinfo = { 0, 0, 0 } } + +static inline void arch_flush_thread(struct arch_thread *thread) +{ +} + +static inline void arch_copy_thread(struct arch_thread *from, + struct arch_thread *to) +{ + to->fs = from->fs; +} + +#include <asm/user.h> + +#define current_text_addr() \ + ({ void *pc; __asm__("movq $1f,%0\n1:":"=g" (pc)); pc; }) + +#define ARCH_IS_STACKGROW(address) \ + (address + 128 >= UPT_SP(¤t->thread.regs.regs)) + +#define KSTK_EIP(tsk) KSTK_REG(tsk, RIP) +#define KSTK_ESP(tsk) KSTK_REG(tsk, RSP) + +#include "asm/processor-generic.h" + +#endif diff --git a/arch/um/sys-x86_64/asm/ptrace.h b/arch/um/sys-x86_64/asm/ptrace.h new file mode 100644 index 0000000..83d8c47 --- /dev/null +++ b/arch/um/sys-x86_64/asm/ptrace.h @@ -0,0 +1,72 @@ +/* + * Copyright 2003 PathScale, Inc. + * + * Licensed under the GPL + */ + +#ifndef __UM_PTRACE_X86_64_H +#define __UM_PTRACE_X86_64_H + +#include "linux/compiler.h" +#include "asm/errno.h" + +#define __FRAME_OFFSETS /* Needed to get the R* macros */ +#include "asm/ptrace-generic.h" + +#define HOST_AUDIT_ARCH AUDIT_ARCH_X86_64 + +#define PT_REGS_RBX(r) UPT_RBX(&(r)->regs) +#define PT_REGS_RCX(r) UPT_RCX(&(r)->regs) +#define PT_REGS_RDX(r) UPT_RDX(&(r)->regs) +#define PT_REGS_RSI(r) UPT_RSI(&(r)->regs) +#define PT_REGS_RDI(r) UPT_RDI(&(r)->regs) +#define PT_REGS_RBP(r) UPT_RBP(&(r)->regs) +#define PT_REGS_RAX(r) UPT_RAX(&(r)->regs) +#define PT_REGS_R8(r) UPT_R8(&(r)->regs) +#define PT_REGS_R9(r) UPT_R9(&(r)->regs) +#define PT_REGS_R10(r) UPT_R10(&(r)->regs) +#define PT_REGS_R11(r) UPT_R11(&(r)->regs) +#define PT_REGS_R12(r) UPT_R12(&(r)->regs) +#define PT_REGS_R13(r) UPT_R13(&(r)->regs) +#define PT_REGS_R14(r) UPT_R14(&(r)->regs) +#define PT_REGS_R15(r) UPT_R15(&(r)->regs) + +#define PT_REGS_FS(r) UPT_FS(&(r)->regs) +#define PT_REGS_GS(r) UPT_GS(&(r)->regs) +#define PT_REGS_DS(r) UPT_DS(&(r)->regs) +#define PT_REGS_ES(r) UPT_ES(&(r)->regs) +#define PT_REGS_SS(r) UPT_SS(&(r)->regs) +#define PT_REGS_CS(r) UPT_CS(&(r)->regs) + +#define PT_REGS_ORIG_RAX(r) UPT_ORIG_RAX(&(r)->regs) +#define PT_REGS_RIP(r) UPT_IP(&(r)->regs) +#define PT_REGS_RSP(r) UPT_SP(&(r)->regs) + +#define PT_REGS_EFLAGS(r) UPT_EFLAGS(&(r)->regs) + +/* XXX */ +#define user_mode(r) UPT_IS_USER(&(r)->regs) +#define PT_REGS_ORIG_SYSCALL(r) PT_REGS_RAX(r) +#define PT_REGS_SYSCALL_RET(r) PT_REGS_RAX(r) + +#define PT_FIX_EXEC_STACK(sp) do ; while(0) + +#define profile_pc(regs) PT_REGS_IP(regs) + +struct user_desc; + +static inline int ptrace_get_thread_area(struct task_struct *child, int idx, + struct user_desc __user *user_desc) +{ + return -ENOSYS; +} + +static inline int ptrace_set_thread_area(struct task_struct *child, int idx, + struct user_desc __user *user_desc) +{ + return -ENOSYS; +} + +extern long arch_prctl(struct task_struct *task, int code, + unsigned long __user *addr); +#endif diff --git a/arch/um/sys-x86_64/bug.c b/arch/um/sys-x86_64/bug.c new file mode 100644 index 0000000..e8034e3 --- /dev/null +++ b/arch/um/sys-x86_64/bug.c @@ -0,0 +1,21 @@ +/* + * Copyright (C) 2006 Jeff Dike (jdike@addtoit.com) + * Licensed under the GPL V2 + */ + +#include <linux/uaccess.h> + +/* + * Mostly copied from i386/x86_86 - eliminated the eip < PAGE_OFFSET because + * that's not relevant in skas mode. + */ + +int is_valid_bugaddr(unsigned long eip) +{ + unsigned short ud2; + + if (probe_kernel_address((unsigned short __user *)eip, ud2)) + return 0; + + return ud2 == 0x0b0f; +} diff --git a/arch/um/sys-x86_64/bugs.c b/arch/um/sys-x86_64/bugs.c new file mode 100644 index 0000000..44e02ba --- /dev/null +++ b/arch/um/sys-x86_64/bugs.c @@ -0,0 +1,15 @@ +/* + * Copyright 2003 PathScale, Inc. + * + * Licensed under the GPL + */ + +#include "sysdep/ptrace.h" + +void arch_check_bugs(void) +{ +} + +void arch_examine_signal(int sig, struct uml_pt_regs *regs) +{ +} diff --git a/arch/um/sys-x86_64/delay.c b/arch/um/sys-x86_64/delay.c new file mode 100644 index 0000000..dee5be6 --- /dev/null +++ b/arch/um/sys-x86_64/delay.c @@ -0,0 +1,30 @@ +/* + * Copyright 2003 PathScale, Inc. + * Copied from arch/x86_64 + * + * Licensed under the GPL + */ + +#include <linux/module.h> +#include <linux/delay.h> +#include <asm/processor.h> +#include <asm/param.h> + +void __delay(unsigned long loops) +{ + unsigned long i; + + for(i = 0; i < loops; i++) + cpu_relax(); +} + +void __udelay(unsigned long usecs) +{ + unsigned long i, n; + + n = (loops_per_jiffy * HZ * usecs) / MILLION; + for(i=0;i<n;i++) + cpu_relax(); +} + +EXPORT_SYMBOL(__udelay); diff --git a/arch/um/sys-x86_64/fault.c b/arch/um/sys-x86_64/fault.c new file mode 100644 index 0000000..ce85117 --- /dev/null +++ b/arch/um/sys-x86_64/fault.c @@ -0,0 +1,28 @@ +/* + * Copyright 2003 PathScale, Inc. + * + * Licensed under the GPL + */ + +#include "sysdep/ptrace.h" + +/* These two are from asm-um/uaccess.h and linux/module.h, check them. */ +struct exception_table_entry +{ + unsigned long insn; + unsigned long fixup; +}; + +const struct exception_table_entry *search_exception_tables(unsigned long add); + +int arch_fixup(unsigned long address, struct uml_pt_regs *regs) +{ + const struct exception_table_entry *fixup; + + fixup = search_exception_tables(address); + if (fixup != 0) { + UPT_IP(regs) = fixup->fixup; + return 1; + } + return 0; +} diff --git a/arch/um/sys-x86_64/ksyms.c b/arch/um/sys-x86_64/ksyms.c new file mode 100644 index 0000000..1db2fce --- /dev/null +++ b/arch/um/sys-x86_64/ksyms.c @@ -0,0 +1,11 @@ +#include <linux/module.h> +#include <asm/string.h> +#include <asm/checksum.h> + +/*XXX: we need them because they would be exported by x86_64 */ +#if (__GNUC__ == 4 && __GNUC_MINOR__ >= 3) || __GNUC__ > 4 +EXPORT_SYMBOL(memcpy); +#else +EXPORT_SYMBOL(__memcpy); +#endif +EXPORT_SYMBOL(csum_partial); diff --git a/arch/um/sys-x86_64/mem.c b/arch/um/sys-x86_64/mem.c new file mode 100644 index 0000000..3f59a0a --- /dev/null +++ b/arch/um/sys-x86_64/mem.c @@ -0,0 +1,25 @@ +/* + * Copyright 2003 PathScale, Inc. + * + * Licensed under the GPL + */ + +#include "linux/mm.h" +#include "asm/page.h" +#include "asm/mman.h" + +unsigned long vm_stack_flags = __VM_STACK_FLAGS; +unsigned long vm_stack_flags32 = __VM_STACK_FLAGS; +unsigned long vm_data_default_flags = __VM_DATA_DEFAULT_FLAGS; +unsigned long vm_data_default_flags32 = __VM_DATA_DEFAULT_FLAGS; +unsigned long vm_force_exec32 = PROT_EXEC; + +/* Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff --git a/arch/um/sys-x86_64/ptrace.c b/arch/um/sys-x86_64/ptrace.c new file mode 100644 index 0000000..f3458d7 --- /dev/null +++ b/arch/um/sys-x86_64/ptrace.c @@ -0,0 +1,195 @@ +/* + * Copyright 2003 PathScale, Inc. + * Copyright (C) 2003 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * + * Licensed under the GPL + */ + +#include <linux/mm.h> +#include <linux/sched.h> +#include <linux/errno.h> +#define __FRAME_OFFSETS +#include <asm/ptrace.h> +#include <asm/uaccess.h> + +/* + * determines which flags the user has access to. + * 1 = access 0 = no access + */ +#define FLAG_MASK 0x44dd5UL + +int putreg(struct task_struct *child, int regno, unsigned long value) +{ + unsigned long tmp; + +#ifdef TIF_IA32 + /* + * Some code in the 64bit emulation may not be 64bit clean. + * Don't take any chances. + */ + if (test_tsk_thread_flag(child, TIF_IA32)) + value &= 0xffffffff; +#endif + switch (regno) { + case FS: + case GS: + case DS: + case ES: + case SS: + case CS: + if (value && (value & 3) != 3) + return -EIO; + value &= 0xffff; + break; + + case FS_BASE: + case GS_BASE: + if (!((value >> 48) == 0 || (value >> 48) == 0xffff)) + return -EIO; + break; + + case EFLAGS: + value &= FLAG_MASK; + tmp = PT_REGS_EFLAGS(&child->thread.regs) & ~FLAG_MASK; + value |= tmp; + break; + } + + PT_REGS_SET(&child->thread.regs, regno, value); + return 0; +} + +int poke_user(struct task_struct *child, long addr, long data) +{ + if ((addr & 3) || addr < 0) + return -EIO; + + if (addr < MAX_REG_OFFSET) + return putreg(child, addr, data); + else if ((addr >= offsetof(struct user, u_debugreg[0])) && + (addr <= offsetof(struct user, u_debugreg[7]))) { + addr -= offsetof(struct user, u_debugreg[0]); + addr = addr >> 2; + if ((addr == 4) || (addr == 5)) + return -EIO; + child->thread.arch.debugregs[addr] = data; + return 0; + } + return -EIO; +} + +unsigned long getreg(struct task_struct *child, int regno) +{ + unsigned long retval = ~0UL; + switch (regno) { + case FS: + case GS: + case DS: + case ES: + case SS: + case CS: + retval = 0xffff; + /* fall through */ + default: + retval &= PT_REG(&child->thread.regs, regno); +#ifdef TIF_IA32 + if (test_tsk_thread_flag(child, TIF_IA32)) + retval &= 0xffffffff; +#endif + } + return retval; +} + +int peek_user(struct task_struct *child, long addr, long data) +{ + /* read the word at location addr in the USER area. */ + unsigned long tmp; + + if ((addr & 3) || addr < 0) + return -EIO; + + tmp = 0; /* Default return condition */ + if (addr < MAX_REG_OFFSET) + tmp = getreg(child, addr); + else if ((addr >= offsetof(struct user, u_debugreg[0])) && + (addr <= offsetof(struct user, u_debugreg[7]))) { + addr -= offsetof(struct user, u_debugreg[0]); + addr = addr >> 2; + tmp = child->thread.arch.debugregs[addr]; + } + return put_user(tmp, (unsigned long *) data); +} + +/* XXX Mostly copied from sys-i386 */ +int is_syscall(unsigned long addr) +{ + unsigned short instr; + int n; + + n = copy_from_user(&instr, (void __user *) addr, sizeof(instr)); + if (n) { + /* + * access_process_vm() grants access to vsyscall and stub, + * while copy_from_user doesn't. Maybe access_process_vm is + * slow, but that doesn't matter, since it will be called only + * in case of singlestepping, if copy_from_user failed. + */ + n = access_process_vm(current, addr, &instr, sizeof(instr), 0); + if (n != sizeof(instr)) { + printk("is_syscall : failed to read instruction from " + "0x%lx\n", addr); + return 1; + } + } + /* sysenter */ + return instr == 0x050f; +} + +int get_fpregs(struct user_i387_struct __user *buf, struct task_struct *child) +{ + int err, n, cpu = ((struct thread_info *) child->stack)->cpu; + long fpregs[HOST_FP_SIZE]; + + BUG_ON(sizeof(*buf) != sizeof(fpregs)); + err = save_fp_registers(userspace_pid[cpu], fpregs); + if (err) + return err; + + n = copy_to_user(buf, fpregs, sizeof(fpregs)); + if (n > 0) + return -EFAULT; + + return n; +} + +int set_fpregs(struct user_i387_struct __user *buf, struct task_struct *child) +{ + int n, cpu = ((struct thread_info *) child->stack)->cpu; + long fpregs[HOST_FP_SIZE]; + + BUG_ON(sizeof(*buf) != sizeof(fpregs)); + n = copy_from_user(fpregs, buf, sizeof(fpregs)); + if (n > 0) + return -EFAULT; + + return restore_fp_registers(userspace_pid[cpu], fpregs); +} + +long subarch_ptrace(struct task_struct *child, long request, long addr, + long data) +{ + int ret = -EIO; + + switch (request) { + case PTRACE_GETFPXREGS: /* Get the child FPU state. */ + ret = get_fpregs((struct user_i387_struct __user *) data, + child); + break; + case PTRACE_SETFPXREGS: /* Set the child FPU state. */ + ret = set_fpregs((struct user_i387_struct __user *) data, + child); + break; + } + + return ret; +} diff --git a/arch/um/sys-x86_64/ptrace_user.c b/arch/um/sys-x86_64/ptrace_user.c new file mode 100644 index 0000000..c57a496 --- /dev/null +++ b/arch/um/sys-x86_64/ptrace_user.c @@ -0,0 +1,22 @@ +/* + * Copyright 2003 PathScale, Inc. + * + * Licensed under the GPL + */ + +#include <errno.h> +#include "ptrace_user.h" + +int ptrace_getregs(long pid, unsigned long *regs_out) +{ + if (ptrace(PTRACE_GETREGS, pid, 0, regs_out) < 0) + return -errno; + return(0); +} + +int ptrace_setregs(long pid, unsigned long *regs_out) +{ + if (ptrace(PTRACE_SETREGS, pid, 0, regs_out) < 0) + return -errno; + return(0); +} diff --git a/arch/um/sys-x86_64/setjmp.S b/arch/um/sys-x86_64/setjmp.S new file mode 100644 index 0000000..45f547b --- /dev/null +++ b/arch/um/sys-x86_64/setjmp.S @@ -0,0 +1,54 @@ +# +# arch/x86_64/setjmp.S +# +# setjmp/longjmp for the x86-64 architecture +# + +# +# The jmp_buf is assumed to contain the following, in order: +# %rbx +# %rsp (post-return) +# %rbp +# %r12 +# %r13 +# %r14 +# %r15 +# <return address> +# + + .text + .align 4 + .globl setjmp + .type setjmp, @function +setjmp: + pop %rsi # Return address, and adjust the stack + xorl %eax,%eax # Return value + movq %rbx,(%rdi) + movq %rsp,8(%rdi) # Post-return %rsp! + push %rsi # Make the call/return stack happy + movq %rbp,16(%rdi) + movq %r12,24(%rdi) + movq %r13,32(%rdi) + movq %r14,40(%rdi) + movq %r15,48(%rdi) + movq %rsi,56(%rdi) # Return address + ret + + .size setjmp,.-setjmp + + .text + .align 4 + .globl longjmp + .type longjmp, @function +longjmp: + movl %esi,%eax # Return value (int) + movq (%rdi),%rbx + movq 8(%rdi),%rsp + movq 16(%rdi),%rbp + movq 24(%rdi),%r12 + movq 32(%rdi),%r13 + movq 40(%rdi),%r14 + movq 48(%rdi),%r15 + jmp *56(%rdi) + + .size longjmp,.-longjmp diff --git a/arch/um/sys-x86_64/shared/sysdep/archsetjmp.h b/arch/um/sys-x86_64/shared/sysdep/archsetjmp.h new file mode 100644 index 0000000..2af8f12 --- /dev/null +++ b/arch/um/sys-x86_64/shared/sysdep/archsetjmp.h @@ -0,0 +1,24 @@ +/* + * arch/um/include/sysdep-x86_64/archsetjmp.h + */ + +#ifndef _KLIBC_ARCHSETJMP_H +#define _KLIBC_ARCHSETJMP_H + +struct __jmp_buf { + unsigned long __rbx; + unsigned long __rsp; + unsigned long __rbp; + unsigned long __r12; + unsigned long __r13; + unsigned long __r14; + unsigned long __r15; + unsigned long __rip; +}; + +typedef struct __jmp_buf jmp_buf[1]; + +#define JB_IP __rip +#define JB_SP __rsp + +#endif /* _SETJMP_H */ diff --git a/arch/um/sys-x86_64/shared/sysdep/barrier.h b/arch/um/sys-x86_64/shared/sysdep/barrier.h new file mode 100644 index 0000000..7b610be --- /dev/null +++ b/arch/um/sys-x86_64/shared/sysdep/barrier.h @@ -0,0 +1,7 @@ +#ifndef __SYSDEP_X86_64_BARRIER_H +#define __SYSDEP_X86_64_BARRIER_H + +/* Copied from include/asm-x86_64 for use by userspace. */ +#define mb() asm volatile("mfence":::"memory") + +#endif diff --git a/arch/um/sys-x86_64/shared/sysdep/checksum.h b/arch/um/sys-x86_64/shared/sysdep/checksum.h new file mode 100644 index 0000000..a5be903 --- /dev/null +++ b/arch/um/sys-x86_64/shared/sysdep/checksum.h @@ -0,0 +1,144 @@ +/* + * Licensed under the GPL + */ + +#ifndef __UM_SYSDEP_CHECKSUM_H +#define __UM_SYSDEP_CHECKSUM_H + +#include "linux/string.h" +#include "linux/in6.h" +#include "asm/uaccess.h" + +extern __wsum csum_partial(const void *buff, int len, __wsum sum); + +/* + * Note: when you get a NULL pointer exception here this means someone + * passed in an incorrect kernel address to one of these functions. + * + * If you use these functions directly please don't forget the + * access_ok(). + */ + +static __inline__ +__wsum csum_partial_copy_nocheck(const void *src, void *dst, + int len, __wsum sum) +{ + memcpy(dst, src, len); + return(csum_partial(dst, len, sum)); +} + +static __inline__ +__wsum csum_partial_copy_from_user(const void __user *src, + void *dst, int len, __wsum sum, + int *err_ptr) +{ + if (copy_from_user(dst, src, len)) { + *err_ptr = -EFAULT; + return (__force __wsum)-1; + } + return csum_partial(dst, len, sum); +} + +/** + * csum_fold - Fold and invert a 32bit checksum. + * sum: 32bit unfolded sum + * + * Fold a 32bit running checksum to 16bit and invert it. This is usually + * the last step before putting a checksum into a packet. + * Make sure not to mix with 64bit checksums. + */ +static inline __sum16 csum_fold(__wsum sum) +{ + __asm__( + " addl %1,%0\n" + " adcl $0xffff,%0" + : "=r" (sum) + : "r" ((__force u32)sum << 16), + "0" ((__force u32)sum & 0xffff0000) + ); + return (__force __sum16)(~(__force u32)sum >> 16); +} + +/** + * csum_tcpup_nofold - Compute an IPv4 pseudo header checksum. + * @saddr: source address + * @daddr: destination address + * @len: length of packet + * @proto: ip protocol of packet + * @sum: initial sum to be added in (32bit unfolded) + * + * Returns the pseudo header checksum the input data. Result is + * 32bit unfolded. + */ +static inline __wsum +csum_tcpudp_nofold(__be32 saddr, __be32 daddr, unsigned short len, + unsigned short proto, __wsum sum) +{ + asm(" addl %1, %0\n" + " adcl %2, %0\n" + " adcl %3, %0\n" + " adcl $0, %0\n" + : "=r" (sum) + : "g" (daddr), "g" (saddr), "g" ((len + proto) << 8), "0" (sum)); + return sum; +} + +/* + * computes the checksum of the TCP/UDP pseudo-header + * returns a 16-bit checksum, already complemented + */ +static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, + unsigned short len, + unsigned short proto, + __wsum sum) +{ + return csum_fold(csum_tcpudp_nofold(saddr,daddr,len,proto,sum)); +} + +/** + * ip_fast_csum - Compute the IPv4 header checksum efficiently. + * iph: ipv4 header + * ihl: length of header / 4 + */ +static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl) +{ + unsigned int sum; + + asm( " movl (%1), %0\n" + " subl $4, %2\n" + " jbe 2f\n" + " addl 4(%1), %0\n" + " adcl 8(%1), %0\n" + " adcl 12(%1), %0\n" + "1: adcl 16(%1), %0\n" + " lea 4(%1), %1\n" + " decl %2\n" + " jne 1b\n" + " adcl $0, %0\n" + " movl %0, %2\n" + " shrl $16, %0\n" + " addw %w2, %w0\n" + " adcl $0, %0\n" + " notl %0\n" + "2:" + /* Since the input registers which are loaded with iph and ipl + are modified, we must also specify them as outputs, or gcc + will assume they contain their original values. */ + : "=r" (sum), "=r" (iph), "=r" (ihl) + : "1" (iph), "2" (ihl) + : "memory"); + return (__force __sum16)sum; +} + +static inline unsigned add32_with_carry(unsigned a, unsigned b) +{ + asm("addl %2,%0\n\t" + "adcl $0,%0" + : "=r" (a) + : "0" (a), "r" (b)); + return a; +} + +extern __sum16 ip_compute_csum(const void *buff, int len); + +#endif diff --git a/arch/um/sys-x86_64/shared/sysdep/faultinfo.h b/arch/um/sys-x86_64/shared/sysdep/faultinfo.h new file mode 100644 index 0000000..cb917b0 --- /dev/null +++ b/arch/um/sys-x86_64/shared/sysdep/faultinfo.h @@ -0,0 +1,29 @@ +/* + * Copyright (C) 2004 Fujitsu Siemens Computers GmbH + * Author: Bodo Stroesser <bstroesser@fujitsu-siemens.com> + * Licensed under the GPL + */ + +#ifndef __FAULTINFO_X86_64_H +#define __FAULTINFO_X86_64_H + +/* this structure contains the full arch-specific faultinfo + * from the traps. + * On i386, ptrace_faultinfo unfortunately doesn't provide + * all the info, since trap_no is missing. + * All common elements are defined at the same position in + * both structures, thus making it easy to copy the + * contents without knowledge about the structure elements. + */ +struct faultinfo { + int error_code; /* in ptrace_faultinfo misleadingly called is_write */ + unsigned long cr2; /* in ptrace_faultinfo called addr */ + int trap_no; /* missing in ptrace_faultinfo */ +}; + +#define FAULT_WRITE(fi) ((fi).error_code & 2) +#define FAULT_ADDRESS(fi) ((fi).cr2) + +#define PTRACE_FULL_FAULTINFO 1 + +#endif diff --git a/arch/um/sys-x86_64/shared/sysdep/host_ldt.h b/arch/um/sys-x86_64/shared/sysdep/host_ldt.h new file mode 100644 index 0000000..e8b1be1 --- /dev/null +++ b/arch/um/sys-x86_64/shared/sysdep/host_ldt.h @@ -0,0 +1,38 @@ +#ifndef __ASM_HOST_LDT_X86_64_H +#define __ASM_HOST_LDT_X86_64_H + +#include <asm/ldt.h> + +/* + * macros stolen from include/asm-x86_64/desc.h + */ +#define LDT_entry_a(info) \ + ((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff)) + +/* Don't allow setting of the lm bit. It is useless anyways because + * 64bit system calls require __USER_CS. */ +#define LDT_entry_b(info) \ + (((info)->base_addr & 0xff000000) | \ + (((info)->base_addr & 0x00ff0000) >> 16) | \ + ((info)->limit & 0xf0000) | \ + (((info)->read_exec_only ^ 1) << 9) | \ + ((info)->contents << 10) | \ + (((info)->seg_not_present ^ 1) << 15) | \ + ((info)->seg_32bit << 22) | \ + ((info)->limit_in_pages << 23) | \ + ((info)->useable << 20) | \ + /* ((info)->lm << 21) | */ \ + 0x7000) + +#define LDT_empty(info) (\ + (info)->base_addr == 0 && \ + (info)->limit == 0 && \ + (info)->contents == 0 && \ + (info)->read_exec_only == 1 && \ + (info)->seg_32bit == 0 && \ + (info)->limit_in_pages == 0 && \ + (info)->seg_not_present == 1 && \ + (info)->useable == 0 && \ + (info)->lm == 0) + +#endif diff --git a/arch/um/sys-x86_64/shared/sysdep/kernel-offsets.h b/arch/um/sys-x86_64/shared/sysdep/kernel-offsets.h new file mode 100644 index 0000000..a307237 --- /dev/null +++ b/arch/um/sys-x86_64/shared/sysdep/kernel-offsets.h @@ -0,0 +1,23 @@ +#include <linux/stddef.h> +#include <linux/sched.h> +#include <linux/time.h> +#include <linux/elf.h> +#include <linux/crypto.h> +#include <asm/page.h> +#include <asm/mman.h> + +#define DEFINE(sym, val) \ + asm volatile("\n->" #sym " %0 " #val : : "i" (val)) + +#define DEFINE_STR1(x) #x +#define DEFINE_STR(sym, val) asm volatile("\n->" #sym " " DEFINE_STR1(val) " " #val: : ) + +#define BLANK() asm volatile("\n->" : : ) + +#define OFFSET(sym, str, mem) \ + DEFINE(sym, offsetof(struct str, mem)); + +void foo(void) +{ +#include <common-offsets.h> +} diff --git a/arch/um/sys-x86_64/shared/sysdep/ptrace.h b/arch/um/sys-x86_64/shared/sysdep/ptrace.h new file mode 100644 index 0000000..fdba545 --- /dev/null +++ b/arch/um/sys-x86_64/shared/sysdep/ptrace.h @@ -0,0 +1,239 @@ +/* + * Copyright 2003 PathScale, Inc. + * Copyright (C) 2003 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * + * Licensed under the GPL + */ + +#ifndef __SYSDEP_X86_64_PTRACE_H +#define __SYSDEP_X86_64_PTRACE_H + +#include "user_constants.h" +#include "sysdep/faultinfo.h" + +#define MAX_REG_OFFSET (UM_FRAME_SIZE) +#define MAX_REG_NR ((MAX_REG_OFFSET) / sizeof(unsigned long)) + +#include "skas_ptregs.h" + +#define REGS_IP(r) ((r)[HOST_IP]) +#define REGS_SP(r) ((r)[HOST_SP]) + +#define REGS_RBX(r) ((r)[HOST_RBX]) +#define REGS_RCX(r) ((r)[HOST_RCX]) +#define REGS_RDX(r) ((r)[HOST_RDX]) +#define REGS_RSI(r) ((r)[HOST_RSI]) +#define REGS_RDI(r) ((r)[HOST_RDI]) +#define REGS_RBP(r) ((r)[HOST_RBP]) +#define REGS_RAX(r) ((r)[HOST_RAX]) +#define REGS_R8(r) ((r)[HOST_R8]) +#define REGS_R9(r) ((r)[HOST_R9]) +#define REGS_R10(r) ((r)[HOST_R10]) +#define REGS_R11(r) ((r)[HOST_R11]) +#define REGS_R12(r) ((r)[HOST_R12]) +#define REGS_R13(r) ((r)[HOST_R13]) +#define REGS_R14(r) ((r)[HOST_R14]) +#define REGS_R15(r) ((r)[HOST_R15]) +#define REGS_CS(r) ((r)[HOST_CS]) +#define REGS_EFLAGS(r) ((r)[HOST_EFLAGS]) +#define REGS_SS(r) ((r)[HOST_SS]) + +#define HOST_FS_BASE 21 +#define HOST_GS_BASE 22 +#define HOST_DS 23 +#define HOST_ES 24 +#define HOST_FS 25 +#define HOST_GS 26 + +/* Also defined in asm/ptrace-x86_64.h, but not in libc headers. So, these + * are already defined for kernel code, but not for userspace code. + */ +#ifndef FS_BASE +/* These aren't defined in ptrace.h, but exist in struct user_regs_struct, + * which is what x86_64 ptrace actually uses. + */ +#define FS_BASE (HOST_FS_BASE * sizeof(long)) +#define GS_BASE (HOST_GS_BASE * sizeof(long)) +#define DS (HOST_DS * sizeof(long)) +#define ES (HOST_ES * sizeof(long)) +#define FS (HOST_FS * sizeof(long)) +#define GS (HOST_GS * sizeof(long)) +#endif + +#define REGS_FS_BASE(r) ((r)[HOST_FS_BASE]) +#define REGS_GS_BASE(r) ((r)[HOST_GS_BASE]) +#define REGS_DS(r) ((r)[HOST_DS]) +#define REGS_ES(r) ((r)[HOST_ES]) +#define REGS_FS(r) ((r)[HOST_FS]) +#define REGS_GS(r) ((r)[HOST_GS]) + +#define REGS_ORIG_RAX(r) ((r)[HOST_ORIG_RAX]) + +#define REGS_SET_SYSCALL_RETURN(r, res) REGS_RAX(r) = (res) + +#define REGS_RESTART_SYSCALL(r) IP_RESTART_SYSCALL(REGS_IP(r)) + +#define REGS_SEGV_IS_FIXABLE(r) SEGV_IS_FIXABLE((r)->trap_type) + +#define REGS_FAULT_ADDR(r) ((r)->fault_addr) + +#define REGS_FAULT_WRITE(r) FAULT_WRITE((r)->fault_type) + +#define REGS_TRAP(r) ((r)->trap_type) + +#define REGS_ERR(r) ((r)->fault_type) + +struct uml_pt_regs { + unsigned long gp[MAX_REG_NR]; + struct faultinfo faultinfo; + long syscall; + int is_user; +}; + +#define EMPTY_UML_PT_REGS { } + +#define UPT_RBX(r) REGS_RBX((r)->gp) +#define UPT_RCX(r) REGS_RCX((r)->gp) +#define UPT_RDX(r) REGS_RDX((r)->gp) +#define UPT_RSI(r) REGS_RSI((r)->gp) +#define UPT_RDI(r) REGS_RDI((r)->gp) +#define UPT_RBP(r) REGS_RBP((r)->gp) +#define UPT_RAX(r) REGS_RAX((r)->gp) +#define UPT_R8(r) REGS_R8((r)->gp) +#define UPT_R9(r) REGS_R9((r)->gp) +#define UPT_R10(r) REGS_R10((r)->gp) +#define UPT_R11(r) REGS_R11((r)->gp) +#define UPT_R12(r) REGS_R12((r)->gp) +#define UPT_R13(r) REGS_R13((r)->gp) +#define UPT_R14(r) REGS_R14((r)->gp) +#define UPT_R15(r) REGS_R15((r)->gp) +#define UPT_CS(r) REGS_CS((r)->gp) +#define UPT_FS_BASE(r) REGS_FS_BASE((r)->gp) +#define UPT_FS(r) REGS_FS((r)->gp) +#define UPT_GS_BASE(r) REGS_GS_BASE((r)->gp) +#define UPT_GS(r) REGS_GS((r)->gp) +#define UPT_DS(r) REGS_DS((r)->gp) +#define UPT_ES(r) REGS_ES((r)->gp) +#define UPT_CS(r) REGS_CS((r)->gp) +#define UPT_SS(r) REGS_SS((r)->gp) +#define UPT_ORIG_RAX(r) REGS_ORIG_RAX((r)->gp) + +#define UPT_IP(r) REGS_IP((r)->gp) +#define UPT_SP(r) REGS_SP((r)->gp) + +#define UPT_EFLAGS(r) REGS_EFLAGS((r)->gp) +#define UPT_SYSCALL_NR(r) ((r)->syscall) +#define UPT_SYSCALL_RET(r) UPT_RAX(r) + +extern int user_context(unsigned long sp); + +#define UPT_IS_USER(r) ((r)->is_user) + +#define UPT_SYSCALL_ARG1(r) UPT_RDI(r) +#define UPT_SYSCALL_ARG2(r) UPT_RSI(r) +#define UPT_SYSCALL_ARG3(r) UPT_RDX(r) +#define UPT_SYSCALL_ARG4(r) UPT_R10(r) +#define UPT_SYSCALL_ARG5(r) UPT_R8(r) +#define UPT_SYSCALL_ARG6(r) UPT_R9(r) + +struct syscall_args { + unsigned long args[6]; +}; + +#define SYSCALL_ARGS(r) ((struct syscall_args) \ + { .args = { UPT_SYSCALL_ARG1(r), \ + UPT_SYSCALL_ARG2(r), \ + UPT_SYSCALL_ARG3(r), \ + UPT_SYSCALL_ARG4(r), \ + UPT_SYSCALL_ARG5(r), \ + UPT_SYSCALL_ARG6(r) } } ) + +#define UPT_REG(regs, reg) \ + ({ unsigned long val; \ + switch(reg){ \ + case R8: val = UPT_R8(regs); break; \ + case R9: val = UPT_R9(regs); break; \ + case R10: val = UPT_R10(regs); break; \ + case R11: val = UPT_R11(regs); break; \ + case R12: val = UPT_R12(regs); break; \ + case R13: val = UPT_R13(regs); break; \ + case R14: val = UPT_R14(regs); break; \ + case R15: val = UPT_R15(regs); break; \ + case RIP: val = UPT_IP(regs); break; \ + case RSP: val = UPT_SP(regs); break; \ + case RAX: val = UPT_RAX(regs); break; \ + case RBX: val = UPT_RBX(regs); break; \ + case RCX: val = UPT_RCX(regs); break; \ + case RDX: val = UPT_RDX(regs); break; \ + case RSI: val = UPT_RSI(regs); break; \ + case RDI: val = UPT_RDI(regs); break; \ + case RBP: val = UPT_RBP(regs); break; \ + case ORIG_RAX: val = UPT_ORIG_RAX(regs); break; \ + case CS: val = UPT_CS(regs); break; \ + case SS: val = UPT_SS(regs); break; \ + case FS_BASE: val = UPT_FS_BASE(regs); break; \ + case GS_BASE: val = UPT_GS_BASE(regs); break; \ + case DS: val = UPT_DS(regs); break; \ + case ES: val = UPT_ES(regs); break; \ + case FS : val = UPT_FS (regs); break; \ + case GS: val = UPT_GS(regs); break; \ + case EFLAGS: val = UPT_EFLAGS(regs); break; \ + default : \ + panic("Bad register in UPT_REG : %d\n", reg); \ + val = -1; \ + } \ + val; \ + }) + + +#define UPT_SET(regs, reg, val) \ + ({ unsigned long __upt_val = val; \ + switch(reg){ \ + case R8: UPT_R8(regs) = __upt_val; break; \ + case R9: UPT_R9(regs) = __upt_val; break; \ + case R10: UPT_R10(regs) = __upt_val; break; \ + case R11: UPT_R11(regs) = __upt_val; break; \ + case R12: UPT_R12(regs) = __upt_val; break; \ + case R13: UPT_R13(regs) = __upt_val; break; \ + case R14: UPT_R14(regs) = __upt_val; break; \ + case R15: UPT_R15(regs) = __upt_val; break; \ + case RIP: UPT_IP(regs) = __upt_val; break; \ + case RSP: UPT_SP(regs) = __upt_val; break; \ + case RAX: UPT_RAX(regs) = __upt_val; break; \ + case RBX: UPT_RBX(regs) = __upt_val; break; \ + case RCX: UPT_RCX(regs) = __upt_val; break; \ + case RDX: UPT_RDX(regs) = __upt_val; break; \ + case RSI: UPT_RSI(regs) = __upt_val; break; \ + case RDI: UPT_RDI(regs) = __upt_val; break; \ + case RBP: UPT_RBP(regs) = __upt_val; break; \ + case ORIG_RAX: UPT_ORIG_RAX(regs) = __upt_val; break; \ + case CS: UPT_CS(regs) = __upt_val; break; \ + case SS: UPT_SS(regs) = __upt_val; break; \ + case FS_BASE: UPT_FS_BASE(regs) = __upt_val; break; \ + case GS_BASE: UPT_GS_BASE(regs) = __upt_val; break; \ + case DS: UPT_DS(regs) = __upt_val; break; \ + case ES: UPT_ES(regs) = __upt_val; break; \ + case FS: UPT_FS(regs) = __upt_val; break; \ + case GS: UPT_GS(regs) = __upt_val; break; \ + case EFLAGS: UPT_EFLAGS(regs) = __upt_val; break; \ + default : \ + panic("Bad register in UPT_SET : %d\n", reg); \ + break; \ + } \ + __upt_val; \ + }) + +#define UPT_SET_SYSCALL_RETURN(r, res) \ + REGS_SET_SYSCALL_RETURN((r)->regs, (res)) + +#define UPT_RESTART_SYSCALL(r) REGS_RESTART_SYSCALL((r)->gp) + +#define UPT_SEGV_IS_FIXABLE(r) REGS_SEGV_IS_FIXABLE(&r->skas) + +#define UPT_FAULTINFO(r) (&(r)->faultinfo) + +static inline void arch_init_registers(int pid) +{ +} + +#endif diff --git a/arch/um/sys-x86_64/shared/sysdep/ptrace_user.h b/arch/um/sys-x86_64/shared/sysdep/ptrace_user.h new file mode 100644 index 0000000..4dbccdb --- /dev/null +++ b/arch/um/sys-x86_64/shared/sysdep/ptrace_user.h @@ -0,0 +1,77 @@ +/* + * Copyright 2003 PathScale, Inc. + * + * Licensed under the GPL + */ + +#ifndef __SYSDEP_X86_64_PTRACE_USER_H__ +#define __SYSDEP_X86_64_PTRACE_USER_H__ + +#define __FRAME_OFFSETS +#include <sys/ptrace.h> +#include <linux/ptrace.h> +#include <asm/ptrace.h> +#undef __FRAME_OFFSETS +#include "user_constants.h" + +#define PT_INDEX(off) ((off) / sizeof(unsigned long)) + +#define PT_SYSCALL_NR(regs) ((regs)[PT_INDEX(ORIG_RAX)]) +#define PT_SYSCALL_NR_OFFSET (ORIG_RAX) + +#define PT_SYSCALL_ARG1(regs) (((unsigned long *) (regs))[PT_INDEX(RDI)]) +#define PT_SYSCALL_ARG1_OFFSET (RDI) + +#define PT_SYSCALL_ARG2(regs) (((unsigned long *) (regs))[PT_INDEX(RSI)]) +#define PT_SYSCALL_ARG2_OFFSET (RSI) + +#define PT_SYSCALL_ARG3(regs) (((unsigned long *) (regs))[PT_INDEX(RDX)]) +#define PT_SYSCALL_ARG3_OFFSET (RDX) + +#define PT_SYSCALL_ARG4(regs) (((unsigned long *) (regs))[PT_INDEX(RCX)]) +#define PT_SYSCALL_ARG4_OFFSET (RCX) + +#define PT_SYSCALL_ARG5(regs) (((unsigned long *) (regs))[PT_INDEX(R8)]) +#define PT_SYSCALL_ARG5_OFFSET (R8) + +#define PT_SYSCALL_ARG6(regs) (((unsigned long *) (regs))[PT_INDEX(R9)]) +#define PT_SYSCALL_ARG6_OFFSET (R9) + +#define PT_SYSCALL_RET_OFFSET (RAX) + +#define PT_IP_OFFSET (RIP) +#define PT_IP(regs) ((regs)[PT_INDEX(RIP)]) + +#define PT_SP_OFFSET (RSP) +#define PT_SP(regs) ((regs)[PT_INDEX(RSP)]) + +#define PT_ORIG_RAX_OFFSET (ORIG_RAX) +#define PT_ORIG_RAX(regs) ((regs)[PT_INDEX(ORIG_RAX)]) + +/* + * x86_64 FC3 doesn't define this in /usr/include/linux/ptrace.h even though + * it's defined in the kernel's include/linux/ptrace.h. Additionally, use the + * 2.4 name and value for 2.4 host compatibility. + */ +#ifndef PTRACE_OLDSETOPTIONS +#define PTRACE_OLDSETOPTIONS 21 +#endif + +/* + * These are before the system call, so the system call number is RAX + * rather than ORIG_RAX, and arg4 is R10 rather than RCX + */ +#define REGS_SYSCALL_NR PT_INDEX(RAX) +#define REGS_SYSCALL_ARG1 PT_INDEX(RDI) +#define REGS_SYSCALL_ARG2 PT_INDEX(RSI) +#define REGS_SYSCALL_ARG3 PT_INDEX(RDX) +#define REGS_SYSCALL_ARG4 PT_INDEX(R10) +#define REGS_SYSCALL_ARG5 PT_INDEX(R8) +#define REGS_SYSCALL_ARG6 PT_INDEX(R9) + +#define REGS_IP_INDEX PT_INDEX(RIP) +#define REGS_SP_INDEX PT_INDEX(RSP) + +#define FP_SIZE (HOST_FP_SIZE) + +#endif diff --git a/arch/um/sys-x86_64/shared/sysdep/sc.h b/arch/um/sys-x86_64/shared/sysdep/sc.h new file mode 100644 index 0000000..8aee45b --- /dev/null +++ b/arch/um/sys-x86_64/shared/sysdep/sc.h @@ -0,0 +1,45 @@ +#ifndef __SYSDEP_X86_64_SC_H +#define __SYSDEP_X86_64_SC_H + +/* Copyright (C) 2003 - 2004 PathScale, Inc + * Released under the GPL + */ + +#include <user_constants.h> + +#define SC_OFFSET(sc, field) \ + *((unsigned long *) &(((char *) (sc))[HOST_##field])) + +#define SC_RBX(sc) SC_OFFSET(sc, SC_RBX) +#define SC_RCX(sc) SC_OFFSET(sc, SC_RCX) +#define SC_RDX(sc) SC_OFFSET(sc, SC_RDX) +#define SC_RSI(sc) SC_OFFSET(sc, SC_RSI) +#define SC_RDI(sc) SC_OFFSET(sc, SC_RDI) +#define SC_RBP(sc) SC_OFFSET(sc, SC_RBP) +#define SC_RAX(sc) SC_OFFSET(sc, SC_RAX) +#define SC_R8(sc) SC_OFFSET(sc, SC_R8) +#define SC_R9(sc) SC_OFFSET(sc, SC_R9) +#define SC_R10(sc) SC_OFFSET(sc, SC_R10) +#define SC_R11(sc) SC_OFFSET(sc, SC_R11) +#define SC_R12(sc) SC_OFFSET(sc, SC_R12) +#define SC_R13(sc) SC_OFFSET(sc, SC_R13) +#define SC_R14(sc) SC_OFFSET(sc, SC_R14) +#define SC_R15(sc) SC_OFFSET(sc, SC_R15) +#define SC_IP(sc) SC_OFFSET(sc, SC_IP) +#define SC_SP(sc) SC_OFFSET(sc, SC_SP) +#define SC_CR2(sc) SC_OFFSET(sc, SC_CR2) +#define SC_ERR(sc) SC_OFFSET(sc, SC_ERR) +#define SC_TRAPNO(sc) SC_OFFSET(sc, SC_TRAPNO) +#define SC_CS(sc) SC_OFFSET(sc, SC_CS) +#define SC_FS(sc) SC_OFFSET(sc, SC_FS) +#define SC_GS(sc) SC_OFFSET(sc, SC_GS) +#define SC_EFLAGS(sc) SC_OFFSET(sc, SC_EFLAGS) +#define SC_SIGMASK(sc) SC_OFFSET(sc, SC_SIGMASK) +#define SC_SS(sc) SC_OFFSET(sc, SC_SS) +#if 0 +#define SC_ORIG_RAX(sc) SC_OFFSET(sc, SC_ORIG_RAX) +#define SC_DS(sc) SC_OFFSET(sc, SC_DS) +#define SC_ES(sc) SC_OFFSET(sc, SC_ES) +#endif + +#endif diff --git a/arch/um/sys-x86_64/shared/sysdep/sigcontext.h b/arch/um/sys-x86_64/shared/sysdep/sigcontext.h new file mode 100644 index 0000000..0155133 --- /dev/null +++ b/arch/um/sys-x86_64/shared/sysdep/sigcontext.h @@ -0,0 +1,27 @@ +/* + * Copyright 2003 PathScale, Inc. + * + * Licensed under the GPL + */ + +#ifndef __SYSDEP_X86_64_SIGCONTEXT_H +#define __SYSDEP_X86_64_SIGCONTEXT_H + +#include <sysdep/sc.h> + +#define IP_RESTART_SYSCALL(ip) ((ip) -= 2) + +#define GET_FAULTINFO_FROM_SC(fi, sc) \ + { \ + (fi).cr2 = SC_CR2(sc); \ + (fi).error_code = SC_ERR(sc); \ + (fi).trap_no = SC_TRAPNO(sc); \ + } + +/* This is Page Fault */ +#define SEGV_IS_FIXABLE(fi) ((fi)->trap_no == 14) + +/* No broken SKAS API, which doesn't pass trap_no, here. */ +#define SEGV_MAYBE_FIXABLE(fi) 0 + +#endif diff --git a/arch/um/sys-x86_64/shared/sysdep/skas_ptrace.h b/arch/um/sys-x86_64/shared/sysdep/skas_ptrace.h new file mode 100644 index 0000000..95db4be --- /dev/null +++ b/arch/um/sys-x86_64/shared/sysdep/skas_ptrace.h @@ -0,0 +1,22 @@ +/* + * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __SYSDEP_X86_64_SKAS_PTRACE_H +#define __SYSDEP_X86_64_SKAS_PTRACE_H + +struct ptrace_faultinfo { + int is_write; + unsigned long addr; +}; + +struct ptrace_ldt { + int func; + void *ptr; + unsigned long bytecount; +}; + +#define PTRACE_LDT 54 + +#endif diff --git a/arch/um/sys-x86_64/shared/sysdep/stub.h b/arch/um/sys-x86_64/shared/sysdep/stub.h new file mode 100644 index 0000000..3432aa2 --- /dev/null +++ b/arch/um/sys-x86_64/shared/sysdep/stub.h @@ -0,0 +1,107 @@ +/* + * Copyright (C) 2004 Jeff Dike (jdike@addtoit.com) + * Licensed under the GPL + */ + +#ifndef __SYSDEP_STUB_H +#define __SYSDEP_STUB_H + +#include <sys/mman.h> +#include <asm/unistd.h> +#include <sysdep/ptrace_user.h> +#include "as-layout.h" +#include "stub-data.h" +#include "kern_constants.h" + +extern void stub_segv_handler(int sig); +extern void stub_clone_handler(void); + +#define STUB_SYSCALL_RET PT_INDEX(RAX) +#define STUB_MMAP_NR __NR_mmap +#define MMAP_OFFSET(o) (o) + +#define __syscall_clobber "r11","rcx","memory" +#define __syscall "syscall" + +static inline long stub_syscall0(long syscall) +{ + long ret; + + __asm__ volatile (__syscall + : "=a" (ret) + : "0" (syscall) : __syscall_clobber ); + + return ret; +} + +static inline long stub_syscall2(long syscall, long arg1, long arg2) +{ + long ret; + + __asm__ volatile (__syscall + : "=a" (ret) + : "0" (syscall), "D" (arg1), "S" (arg2) : __syscall_clobber ); + + return ret; +} + +static inline long stub_syscall3(long syscall, long arg1, long arg2, long arg3) +{ + long ret; + + __asm__ volatile (__syscall + : "=a" (ret) + : "0" (syscall), "D" (arg1), "S" (arg2), "d" (arg3) + : __syscall_clobber ); + + return ret; +} + +static inline long stub_syscall4(long syscall, long arg1, long arg2, long arg3, + long arg4) +{ + long ret; + + __asm__ volatile ("movq %5,%%r10 ; " __syscall + : "=a" (ret) + : "0" (syscall), "D" (arg1), "S" (arg2), "d" (arg3), + "g" (arg4) + : __syscall_clobber, "r10" ); + + return ret; +} + +static inline long stub_syscall5(long syscall, long arg1, long arg2, long arg3, + long arg4, long arg5) +{ + long ret; + + __asm__ volatile ("movq %5,%%r10 ; movq %6,%%r8 ; " __syscall + : "=a" (ret) + : "0" (syscall), "D" (arg1), "S" (arg2), "d" (arg3), + "g" (arg4), "g" (arg5) + : __syscall_clobber, "r10", "r8" ); + + return ret; +} + +static inline void trap_myself(void) +{ + __asm("int3"); +} + +static inline void remap_stack(long fd, unsigned long offset) +{ + __asm__ volatile ("movq %4,%%r10 ; movq %5,%%r8 ; " + "movq %6, %%r9; " __syscall "; movq %7, %%rbx ; " + "movq %%rax, (%%rbx)": + : "a" (STUB_MMAP_NR), "D" (STUB_DATA), + "S" (UM_KERN_PAGE_SIZE), + "d" (PROT_READ | PROT_WRITE), + "g" (MAP_FIXED | MAP_SHARED), "g" (fd), + "g" (offset), + "i" (&((struct stub_data *) STUB_DATA)->err) + : __syscall_clobber, "r10", "r8", "r9" ); +} + +#endif diff --git a/arch/um/sys-x86_64/shared/sysdep/syscalls.h b/arch/um/sys-x86_64/shared/sysdep/syscalls.h new file mode 100644 index 0000000..7cfb0b0 --- /dev/null +++ b/arch/um/sys-x86_64/shared/sysdep/syscalls.h @@ -0,0 +1,33 @@ +/* + * Copyright 2003 PathScale, Inc. + * + * Licensed under the GPL + */ + +#ifndef __SYSDEP_X86_64_SYSCALLS_H__ +#define __SYSDEP_X86_64_SYSCALLS_H__ + +#include <linux/msg.h> +#include <linux/shm.h> +#include <kern_constants.h> + +typedef long syscall_handler_t(void); + +extern syscall_handler_t *sys_call_table[]; + +#define EXECUTE_SYSCALL(syscall, regs) \ + (((long (*)(long, long, long, long, long, long)) \ + (*sys_call_table[syscall]))(UPT_SYSCALL_ARG1(®s->regs), \ + UPT_SYSCALL_ARG2(®s->regs), \ + UPT_SYSCALL_ARG3(®s->regs), \ + UPT_SYSCALL_ARG4(®s->regs), \ + UPT_SYSCALL_ARG5(®s->regs), \ + UPT_SYSCALL_ARG6(®s->regs))) + +extern long old_mmap(unsigned long addr, unsigned long len, + unsigned long prot, unsigned long flags, + unsigned long fd, unsigned long pgoff); +extern syscall_handler_t sys_modify_ldt; +extern syscall_handler_t sys_arch_prctl; + +#endif diff --git a/arch/um/sys-x86_64/shared/sysdep/system.h b/arch/um/sys-x86_64/shared/sysdep/system.h new file mode 100644 index 0000000..d1b93c4 --- /dev/null +++ b/arch/um/sys-x86_64/shared/sysdep/system.h @@ -0,0 +1,132 @@ +#ifndef _ASM_X86_SYSTEM_H_ +#define _ASM_X86_SYSTEM_H_ + +#include <asm/asm.h> +#include <asm/segment.h> +#include <asm/cpufeature.h> +#include <asm/cmpxchg.h> +#include <asm/nops.h> + +#include <linux/kernel.h> +#include <linux/irqflags.h> + +/* entries in ARCH_DLINFO: */ +#ifdef CONFIG_IA32_EMULATION +# define AT_VECTOR_SIZE_ARCH 2 +#else +# define AT_VECTOR_SIZE_ARCH 1 +#endif + +extern unsigned long arch_align_stack(unsigned long sp); + +void default_idle(void); + +/* + * Force strict CPU ordering. + * And yes, this is required on UP too when we're talking + * to devices. + */ +#ifdef CONFIG_X86_32 +/* + * Some non-Intel clones support out of order store. wmb() ceases to be a + * nop for these. + */ +#define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2) +#define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2) +#define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM) +#else +#define mb() asm volatile("mfence":::"memory") +#define rmb() asm volatile("lfence":::"memory") +#define wmb() asm volatile("sfence" ::: "memory") +#endif + +/** + * read_barrier_depends - Flush all pending reads that subsequents reads + * depend on. + * + * No data-dependent reads from memory-like regions are ever reordered + * over this barrier. All reads preceding this primitive are guaranteed + * to access memory (but not necessarily other CPUs' caches) before any + * reads following this primitive that depend on the data return by + * any of the preceding reads. This primitive is much lighter weight than + * rmb() on most CPUs, and is never heavier weight than is + * rmb(). + * + * These ordering constraints are respected by both the local CPU + * and the compiler. + * + * Ordering is not guaranteed by anything other than these primitives, + * not even by data dependencies. See the documentation for + * memory_barrier() for examples and URLs to more information. + * + * For example, the following code would force ordering (the initial + * value of "a" is zero, "b" is one, and "p" is "&a"): + * + * <programlisting> + * CPU 0 CPU 1 + * + * b = 2; + * memory_barrier(); + * p = &b; q = p; + * read_barrier_depends(); + * d = *q; + * </programlisting> + * + * because the read of "*q" depends on the read of "p" and these + * two reads are separated by a read_barrier_depends(). However, + * the following code, with the same initial values for "a" and "b": + * + * <programlisting> + * CPU 0 CPU 1 + * + * a = 2; + * memory_barrier(); + * b = 3; y = b; + * read_barrier_depends(); + * x = a; + * </programlisting> + * + * does not enforce ordering, since there is no data dependency between + * the read of "a" and the read of "b". Therefore, on some CPUs, such + * as Alpha, "y" could be set to 3 and "x" to 0. Use rmb() + * in cases like this where there are no data dependencies. + **/ + +#define read_barrier_depends() do { } while (0) + +#ifdef CONFIG_SMP +#define smp_mb() mb() +#ifdef CONFIG_X86_PPRO_FENCE +# define smp_rmb() rmb() +#else +# define smp_rmb() barrier() +#endif +#ifdef CONFIG_X86_OOSTORE +# define smp_wmb() wmb() +#else +# define smp_wmb() barrier() +#endif +#define smp_read_barrier_depends() read_barrier_depends() +#define set_mb(var, value) do { (void)xchg(&var, value); } while (0) +#else +#define smp_mb() barrier() +#define smp_rmb() barrier() +#define smp_wmb() barrier() +#define smp_read_barrier_depends() do { } while (0) +#define set_mb(var, value) do { var = value; barrier(); } while (0) +#endif + +/* + * Stop RDTSC speculation. This is needed when you need to use RDTSC + * (or get_cycles or vread that possibly accesses the TSC) in a defined + * code region. + * + * (Could use an alternative three way for this if there was one.) + */ +static inline void rdtsc_barrier(void) +{ + alternative(ASM_NOP3, "mfence", X86_FEATURE_MFENCE_RDTSC); + alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC); +} + +#endif diff --git a/arch/um/sys-x86_64/shared/sysdep/tls.h b/arch/um/sys-x86_64/shared/sysdep/tls.h new file mode 100644 index 0000000..18c000d --- /dev/null +++ b/arch/um/sys-x86_64/shared/sysdep/tls.h @@ -0,0 +1,29 @@ +#ifndef _SYSDEP_TLS_H +#define _SYSDEP_TLS_H + +# ifndef __KERNEL__ + +/* Change name to avoid conflicts with the original one from <asm/ldt.h>, which + * may be named user_desc (but in 2.4 and in header matching its API was named + * modify_ldt_ldt_s). */ + +typedef struct um_dup_user_desc { + unsigned int entry_number; + unsigned int base_addr; + unsigned int limit; + unsigned int seg_32bit:1; + unsigned int contents:2; + unsigned int read_exec_only:1; + unsigned int limit_in_pages:1; + unsigned int seg_not_present:1; + unsigned int useable:1; + unsigned int lm:1; +} user_desc_t; + +# else /* __KERNEL__ */ + +# include <ldt.h> +typedef struct user_desc user_desc_t; + +# endif /* __KERNEL__ */ +#endif /* _SYSDEP_TLS_H */ diff --git a/arch/um/sys-x86_64/shared/sysdep/vm-flags.h b/arch/um/sys-x86_64/shared/sysdep/vm-flags.h new file mode 100644 index 0000000..3213edf --- /dev/null +++ b/arch/um/sys-x86_64/shared/sysdep/vm-flags.h @@ -0,0 +1,33 @@ +/* + * Copyright (C) 2004 Jeff Dike (jdike@addtoit.com) + * Copyright 2003 PathScale, Inc. + * Licensed under the GPL + */ + +#ifndef __VM_FLAGS_X86_64_H +#define __VM_FLAGS_X86_64_H + +#define __VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ + VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#define __VM_STACK_FLAGS (VM_GROWSDOWN | VM_READ | VM_WRITE | \ + VM_EXEC | VM_MAYREAD | VM_MAYWRITE | \ + VM_MAYEXEC) + +extern unsigned long vm_stack_flags, vm_stack_flags32; +extern unsigned long vm_data_default_flags, vm_data_default_flags32; +extern unsigned long vm_force_exec32; + +#ifdef TIF_IA32 +#define VM_DATA_DEFAULT_FLAGS \ + (test_thread_flag(TIF_IA32) ? vm_data_default_flags32 : \ + vm_data_default_flags) + +#define VM_STACK_DEFAULT_FLAGS \ + (test_thread_flag(TIF_IA32) ? vm_stack_flags32 : vm_stack_flags) +#endif + +#define VM_DATA_DEFAULT_FLAGS vm_data_default_flags + +#define VM_STACK_DEFAULT_FLAGS vm_stack_flags + +#endif diff --git a/arch/um/sys-x86_64/signal.c b/arch/um/sys-x86_64/signal.c new file mode 100644 index 0000000..1a899a7 --- /dev/null +++ b/arch/um/sys-x86_64/signal.c @@ -0,0 +1,291 @@ +/* + * Copyright (C) 2003 PathScale, Inc. + * Copyright (C) 2003 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include <linux/personality.h> +#include <linux/ptrace.h> +#include <asm/unistd.h> +#include <asm/uaccess.h> +#include <asm/ucontext.h> +#include "frame_kern.h" +#include "skas.h" + +void copy_sc(struct uml_pt_regs *regs, void *from) +{ + struct sigcontext *sc = from; + +#define GETREG(regs, regno, sc, regname) \ + (regs)->gp[(regno) / sizeof(unsigned long)] = (sc)->regname + + GETREG(regs, R8, sc, r8); + GETREG(regs, R9, sc, r9); + GETREG(regs, R10, sc, r10); + GETREG(regs, R11, sc, r11); + GETREG(regs, R12, sc, r12); + GETREG(regs, R13, sc, r13); + GETREG(regs, R14, sc, r14); + GETREG(regs, R15, sc, r15); + GETREG(regs, RDI, sc, di); + GETREG(regs, RSI, sc, si); + GETREG(regs, RBP, sc, bp); + GETREG(regs, RBX, sc, bx); + GETREG(regs, RDX, sc, dx); + GETREG(regs, RAX, sc, ax); + GETREG(regs, RCX, sc, cx); + GETREG(regs, RSP, sc, sp); + GETREG(regs, RIP, sc, ip); + GETREG(regs, EFLAGS, sc, flags); + GETREG(regs, CS, sc, cs); + +#undef GETREG +} + +static int copy_sc_from_user(struct pt_regs *regs, + struct sigcontext __user *from, + struct _fpstate __user *fpp) +{ + struct user_i387_struct fp; + int err = 0; + +#define GETREG(regs, regno, sc, regname) \ + __get_user((regs)->regs.gp[(regno) / sizeof(unsigned long)], \ + &(sc)->regname) + + err |= GETREG(regs, R8, from, r8); + err |= GETREG(regs, R9, from, r9); + err |= GETREG(regs, R10, from, r10); + err |= GETREG(regs, R11, from, r11); + err |= GETREG(regs, R12, from, r12); + err |= GETREG(regs, R13, from, r13); + err |= GETREG(regs, R14, from, r14); + err |= GETREG(regs, R15, from, r15); + err |= GETREG(regs, RDI, from, di); + err |= GETREG(regs, RSI, from, si); + err |= GETREG(regs, RBP, from, bp); + err |= GETREG(regs, RBX, from, bx); + err |= GETREG(regs, RDX, from, dx); + err |= GETREG(regs, RAX, from, ax); + err |= GETREG(regs, RCX, from, cx); + err |= GETREG(regs, RSP, from, sp); + err |= GETREG(regs, RIP, from, ip); + err |= GETREG(regs, EFLAGS, from, flags); + err |= GETREG(regs, CS, from, cs); + if (err) + return 1; + +#undef GETREG + + err = copy_from_user(&fp, fpp, sizeof(struct user_i387_struct)); + if (err) + return 1; + + err = restore_fp_registers(userspace_pid[current_thread_info()->cpu], + (unsigned long *) &fp); + if (err < 0) { + printk(KERN_ERR "copy_sc_from_user - " + "restore_fp_registers failed, errno = %d\n", + -err); + return 1; + } + + return 0; +} + +static int copy_sc_to_user(struct sigcontext __user *to, + struct _fpstate __user *to_fp, struct pt_regs *regs, + unsigned long mask, unsigned long sp) +{ + struct faultinfo * fi = ¤t->thread.arch.faultinfo; + struct user_i387_struct fp; + int err = 0; + + err |= __put_user(0, &to->gs); + err |= __put_user(0, &to->fs); + +#define PUTREG(regs, regno, sc, regname) \ + __put_user((regs)->regs.gp[(regno) / sizeof(unsigned long)], \ + &(sc)->regname) + + err |= PUTREG(regs, RDI, to, di); + err |= PUTREG(regs, RSI, to, si); + err |= PUTREG(regs, RBP, to, bp); + /* + * Must use original RSP, which is passed in, rather than what's in + * the pt_regs, because that's already been updated to point at the + * signal frame. + */ + err |= __put_user(sp, &to->sp); + err |= PUTREG(regs, RBX, to, bx); + err |= PUTREG(regs, RDX, to, dx); + err |= PUTREG(regs, RCX, to, cx); + err |= PUTREG(regs, RAX, to, ax); + err |= PUTREG(regs, R8, to, r8); + err |= PUTREG(regs, R9, to, r9); + err |= PUTREG(regs, R10, to, r10); + err |= PUTREG(regs, R11, to, r11); + err |= PUTREG(regs, R12, to, r12); + err |= PUTREG(regs, R13, to, r13); + err |= PUTREG(regs, R14, to, r14); + err |= PUTREG(regs, R15, to, r15); + err |= PUTREG(regs, CS, to, cs); /* XXX x86_64 doesn't do this */ + + err |= __put_user(fi->cr2, &to->cr2); + err |= __put_user(fi->error_code, &to->err); + err |= __put_user(fi->trap_no, &to->trapno); + + err |= PUTREG(regs, RIP, to, ip); + err |= PUTREG(regs, EFLAGS, to, flags); +#undef PUTREG + + err |= __put_user(mask, &to->oldmask); + if (err) + return 1; + + err = save_fp_registers(userspace_pid[current_thread_info()->cpu], + (unsigned long *) &fp); + if (err < 0) { + printk(KERN_ERR "copy_sc_from_user - restore_fp_registers " + "failed, errno = %d\n", -err); + return 1; + } + + if (copy_to_user(to_fp, &fp, sizeof(struct user_i387_struct))) + return 1; + + return err; +} + +struct rt_sigframe +{ + char __user *pretcode; + struct ucontext uc; + struct siginfo info; + struct _fpstate fpstate; +}; + +#define round_down(m, n) (((m) / (n)) * (n)) + +int setup_signal_stack_si(unsigned long stack_top, int sig, + struct k_sigaction *ka, struct pt_regs * regs, + siginfo_t *info, sigset_t *set) +{ + struct rt_sigframe __user *frame; + unsigned long save_sp = PT_REGS_RSP(regs); + int err = 0; + struct task_struct *me = current; + + frame = (struct rt_sigframe __user *) + round_down(stack_top - sizeof(struct rt_sigframe), 16); + /* Subtract 128 for a red zone and 8 for proper alignment */ + frame = (struct rt_sigframe __user *) ((unsigned long) frame - 128 - 8); + + if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) + goto out; + + if (ka->sa.sa_flags & SA_SIGINFO) { + err |= copy_siginfo_to_user(&frame->info, info); + if (err) + goto out; + } + + /* + * Update SP now because the page fault handler refuses to extend + * the stack if the faulting address is too far below the current + * SP, which frame now certainly is. If there's an error, the original + * value is restored on the way out. + * When writing the sigcontext to the stack, we have to write the + * original value, so that's passed to copy_sc_to_user, which does + * the right thing with it. + */ + PT_REGS_RSP(regs) = (unsigned long) frame; + + /* Create the ucontext. */ + err |= __put_user(0, &frame->uc.uc_flags); + err |= __put_user(0, &frame->uc.uc_link); + err |= __put_user(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp); + err |= __put_user(sas_ss_flags(save_sp), + &frame->uc.uc_stack.ss_flags); + err |= __put_user(me->sas_ss_size, &frame->uc.uc_stack.ss_size); + err |= copy_sc_to_user(&frame->uc.uc_mcontext, &frame->fpstate, regs, + set->sig[0], save_sp); + err |= __put_user(&frame->fpstate, &frame->uc.uc_mcontext.fpstate); + if (sizeof(*set) == 16) { + __put_user(set->sig[0], &frame->uc.uc_sigmask.sig[0]); + __put_user(set->sig[1], &frame->uc.uc_sigmask.sig[1]); + } + else + err |= __copy_to_user(&frame->uc.uc_sigmask, set, + sizeof(*set)); + + /* + * Set up to return from userspace. If provided, use a stub + * already in userspace. + */ + /* x86-64 should always use SA_RESTORER. */ + if (ka->sa.sa_flags & SA_RESTORER) + err |= __put_user(ka->sa.sa_restorer, &frame->pretcode); + else + /* could use a vstub here */ + goto restore_sp; + + if (err) + goto restore_sp; + + /* Set up registers for signal handler */ + { + struct exec_domain *ed = current_thread_info()->exec_domain; + if (unlikely(ed && ed->signal_invmap && sig < 32)) + sig = ed->signal_invmap[sig]; + } + + PT_REGS_RDI(regs) = sig; + /* In case the signal handler was declared without prototypes */ + PT_REGS_RAX(regs) = 0; + + /* + * This also works for non SA_SIGINFO handlers because they expect the + * next argument after the signal number on the stack. + */ + PT_REGS_RSI(regs) = (unsigned long) &frame->info; + PT_REGS_RDX(regs) = (unsigned long) &frame->uc; + PT_REGS_RIP(regs) = (unsigned long) ka->sa.sa_handler; + out: + return err; + +restore_sp: + PT_REGS_RSP(regs) = save_sp; + return err; +} + +long sys_rt_sigreturn(struct pt_regs *regs) +{ + unsigned long sp = PT_REGS_SP(¤t->thread.regs); + struct rt_sigframe __user *frame = + (struct rt_sigframe __user *)(sp - 8); + struct ucontext __user *uc = &frame->uc; + sigset_t set; + + if (copy_from_user(&set, &uc->uc_sigmask, sizeof(set))) + goto segfault; + + sigdelsetmask(&set, ~_BLOCKABLE); + + spin_lock_irq(¤t->sighand->siglock); + current->blocked = set; + recalc_sigpending(); + spin_unlock_irq(¤t->sighand->siglock); + + if (copy_sc_from_user(¤t->thread.regs, &uc->uc_mcontext, + &frame->fpstate)) + goto segfault; + + /* Avoid ERESTART handling */ + PT_REGS_SYSCALL_NR(¤t->thread.regs) = -1; + return PT_REGS_SYSCALL_RET(¤t->thread.regs); + + segfault: + force_sig(SIGSEGV, current); + return 0; +} diff --git a/arch/um/sys-x86_64/stub.S b/arch/um/sys-x86_64/stub.S new file mode 100644 index 0000000..6d9edf9 --- /dev/null +++ b/arch/um/sys-x86_64/stub.S @@ -0,0 +1,66 @@ +#include "as-layout.h" + + .globl syscall_stub +.section .__syscall_stub, "x" +syscall_stub: + syscall + /* We don't have 64-bit constants, so this constructs the address + * we need. + */ + movq $(STUB_DATA >> 32), %rbx + salq $32, %rbx + movq $(STUB_DATA & 0xffffffff), %rcx + or %rcx, %rbx + movq %rax, (%rbx) + int3 + + .globl batch_syscall_stub +batch_syscall_stub: + mov $(STUB_DATA >> 32), %rbx + sal $32, %rbx + mov $(STUB_DATA & 0xffffffff), %rax + or %rax, %rbx + /* load pointer to first operation */ + mov %rbx, %rsp + add $0x10, %rsp +again: + /* load length of additional data */ + mov 0x0(%rsp), %rax + + /* if(length == 0) : end of list */ + /* write possible 0 to header */ + mov %rax, 8(%rbx) + cmp $0, %rax + jz done + + /* save current pointer */ + mov %rsp, 8(%rbx) + + /* skip additional data */ + add %rax, %rsp + + /* load syscall-# */ + pop %rax + + /* load syscall params */ + pop %rdi + pop %rsi + pop %rdx + pop %r10 + pop %r8 + pop %r9 + + /* execute syscall */ + syscall + + /* check return value */ + pop %rcx + cmp %rcx, %rax + je again + +done: + /* save return value */ + mov %rax, (%rbx) + + /* stop */ + int3 diff --git a/arch/um/sys-x86_64/stub_segv.c b/arch/um/sys-x86_64/stub_segv.c new file mode 100644 index 0000000..ced051a --- /dev/null +++ b/arch/um/sys-x86_64/stub_segv.c @@ -0,0 +1,22 @@ +/* + * Copyright (C) 2004 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include <signal.h> +#include "as-layout.h" +#include "sysdep/stub.h" +#include "sysdep/faultinfo.h" +#include "sysdep/sigcontext.h" + +void __attribute__ ((__section__ (".__syscall_stub"))) +stub_segv_handler(int sig) +{ + struct ucontext *uc; + + __asm__ __volatile__("movq %%rdx, %0" : "=g" (uc) :); + GET_FAULTINFO_FROM_SC(*((struct faultinfo *) STUB_DATA), + &uc->uc_mcontext); + trap_myself(); +} + diff --git a/arch/um/sys-x86_64/syscall_table.c b/arch/um/sys-x86_64/syscall_table.c new file mode 100644 index 0000000..dd21d69 --- /dev/null +++ b/arch/um/sys-x86_64/syscall_table.c @@ -0,0 +1,70 @@ +/* + * System call table for UML/x86-64, copied from arch/x86_64/kernel/syscall.c + * with some changes for UML. + */ + +#include <linux/linkage.h> +#include <linux/sys.h> +#include <linux/cache.h> +#include <kern_constants.h> + +#define __NO_STUBS + +/* + * Below you can see, in terms of #define's, the differences between the x86-64 + * and the UML syscall table. + */ + +/* Not going to be implemented by UML, since we have no hardware. */ +#define stub_iopl sys_ni_syscall +#define sys_ioperm sys_ni_syscall + +/* + * The UML TLS problem. Note that x86_64 does not implement this, so the below + * is needed only for the ia32 compatibility. + */ + +/* On UML we call it this way ("old" means it's not mmap2) */ +#define sys_mmap old_mmap +/* + * On x86-64 sys_uname is actually sys_newuname plus a compatibility trick. + * See arch/x86_64/kernel/sys_x86_64.c + */ +#define sys_uname sys_uname64 + +#define stub_clone sys_clone +#define stub_fork sys_fork +#define stub_vfork sys_vfork +#define stub_execve sys_execve +#define stub_rt_sigsuspend sys_rt_sigsuspend +#define stub_sigaltstack sys_sigaltstack +#define stub_rt_sigreturn sys_rt_sigreturn + +#define __SYSCALL(nr, sym) extern asmlinkage void sym(void) ; +#undef _ASM_X86_UNISTD_64_H +#include "../../x86/include/asm/unistd_64.h" + +#undef __SYSCALL +#define __SYSCALL(nr, sym) [ nr ] = sym, +#undef _ASM_X86_UNISTD_64_H + +typedef void (*sys_call_ptr_t)(void); + +extern void sys_ni_syscall(void); + +/* + * We used to have a trick here which made sure that holes in the + * x86_64 table were filled in with sys_ni_syscall, but a comment in + * unistd_64.h says that holes aren't allowed, so the trick was + * removed. + * The trick looked like this + * [0 ... UM_NR_syscall_max] = &sys_ni_syscall + * before including unistd_64.h - the later initializations overwrote + * the sys_ni_syscall filler. + */ + +sys_call_ptr_t sys_call_table[] __cacheline_aligned = { +#include "../../x86/include/asm/unistd_64.h" +}; + +int syscall_table_size = sizeof(sys_call_table); diff --git a/arch/um/sys-x86_64/syscalls.c b/arch/um/sys-x86_64/syscalls.c new file mode 100644 index 0000000..f1199fd --- /dev/null +++ b/arch/um/sys-x86_64/syscalls.c @@ -0,0 +1,116 @@ +/* + * Copyright (C) 2003 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Copyright 2003 PathScale, Inc. + * + * Licensed under the GPL + */ + +#include "linux/linkage.h" +#include "linux/personality.h" +#include "linux/utsname.h" +#include "asm/prctl.h" /* XXX This should get the constants from libc */ +#include "asm/uaccess.h" +#include "os.h" + +asmlinkage long sys_uname64(struct new_utsname __user * name) +{ + int err; + + down_read(&uts_sem); + err = copy_to_user(name, utsname(), sizeof (*name)); + up_read(&uts_sem); + + if (personality(current->personality) == PER_LINUX32) + err |= copy_to_user(&name->machine, "i686", 5); + + return err ? -EFAULT : 0; +} + +long arch_prctl(struct task_struct *task, int code, unsigned long __user *addr) +{ + unsigned long *ptr = addr, tmp; + long ret; + int pid = task->mm->context.id.u.pid; + + /* + * With ARCH_SET_FS (and ARCH_SET_GS is treated similarly to + * be safe), we need to call arch_prctl on the host because + * setting %fs may result in something else happening (like a + * GDT or thread.fs being set instead). So, we let the host + * fiddle the registers and thread struct and restore the + * registers afterwards. + * + * So, the saved registers are stored to the process (this + * needed because a stub may have been the last thing to run), + * arch_prctl is run on the host, then the registers are read + * back. + */ + switch (code) { + case ARCH_SET_FS: + case ARCH_SET_GS: + ret = restore_registers(pid, ¤t->thread.regs.regs); + if (ret) + return ret; + break; + case ARCH_GET_FS: + case ARCH_GET_GS: + /* + * With these two, we read to a local pointer and + * put_user it to the userspace pointer that we were + * given. If addr isn't valid (because it hasn't been + * faulted in or is just bogus), we want put_user to + * fault it in (or return -EFAULT) instead of having + * the host return -EFAULT. + */ + ptr = &tmp; + } + + ret = os_arch_prctl(pid, code, ptr); + if (ret) + return ret; + + switch (code) { + case ARCH_SET_FS: + current->thread.arch.fs = (unsigned long) ptr; + ret = save_registers(pid, ¤t->thread.regs.regs); + break; + case ARCH_SET_GS: + ret = save_registers(pid, ¤t->thread.regs.regs); + break; + case ARCH_GET_FS: + ret = put_user(tmp, addr); + break; + case ARCH_GET_GS: + ret = put_user(tmp, addr); + break; + } + + return ret; +} + +long sys_arch_prctl(int code, unsigned long addr) +{ + return arch_prctl(current, code, (unsigned long __user *) addr); +} + +long sys_clone(unsigned long clone_flags, unsigned long newsp, + void __user *parent_tid, void __user *child_tid) +{ + long ret; + + if (!newsp) + newsp = UPT_SP(¤t->thread.regs.regs); + current->thread.forking = 1; + ret = do_fork(clone_flags, newsp, ¤t->thread.regs, 0, parent_tid, + child_tid); + current->thread.forking = 0; + return ret; +} + +void arch_switch_to(struct task_struct *to) +{ + if ((to->thread.arch.fs == 0) || (to->mm == NULL)) + return; + + arch_prctl(to, ARCH_SET_FS, (void __user *) to->thread.arch.fs); +} diff --git a/arch/um/sys-x86_64/sysrq.c b/arch/um/sys-x86_64/sysrq.c new file mode 100644 index 0000000..f4f82be --- /dev/null +++ b/arch/um/sys-x86_64/sysrq.c @@ -0,0 +1,41 @@ +/* + * Copyright 2003 PathScale, Inc. + * + * Licensed under the GPL + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/utsname.h> +#include <asm/current.h> +#include <asm/ptrace.h> +#include "sysrq.h" + +void __show_regs(struct pt_regs *regs) +{ + printk("\n"); + print_modules(); + printk(KERN_INFO "Pid: %d, comm: %.20s %s %s\n", task_pid_nr(current), + current->comm, print_tainted(), init_utsname()->release); + printk(KERN_INFO "RIP: %04lx:[<%016lx>]\n", PT_REGS_CS(regs) & 0xffff, + PT_REGS_RIP(regs)); + printk(KERN_INFO "RSP: %016lx EFLAGS: %08lx\n", PT_REGS_RSP(regs), + PT_REGS_EFLAGS(regs)); + printk(KERN_INFO "RAX: %016lx RBX: %016lx RCX: %016lx\n", + PT_REGS_RAX(regs), PT_REGS_RBX(regs), PT_REGS_RCX(regs)); + printk(KERN_INFO "RDX: %016lx RSI: %016lx RDI: %016lx\n", + PT_REGS_RDX(regs), PT_REGS_RSI(regs), PT_REGS_RDI(regs)); + printk(KERN_INFO "RBP: %016lx R08: %016lx R09: %016lx\n", + PT_REGS_RBP(regs), PT_REGS_R8(regs), PT_REGS_R9(regs)); + printk(KERN_INFO "R10: %016lx R11: %016lx R12: %016lx\n", + PT_REGS_R10(regs), PT_REGS_R11(regs), PT_REGS_R12(regs)); + printk(KERN_INFO "R13: %016lx R14: %016lx R15: %016lx\n", + PT_REGS_R13(regs), PT_REGS_R14(regs), PT_REGS_R15(regs)); +} + +void show_regs(struct pt_regs *regs) +{ + __show_regs(regs); + show_trace(current, (unsigned long *) ®s); +} diff --git a/arch/um/sys-x86_64/tls.c b/arch/um/sys-x86_64/tls.c new file mode 100644 index 0000000..f7ba462 --- /dev/null +++ b/arch/um/sys-x86_64/tls.c @@ -0,0 +1,17 @@ +#include "linux/sched.h" + +void clear_flushed_tls(struct task_struct *task) +{ +} + +int arch_copy_tls(struct task_struct *t) +{ + /* + * If CLONE_SETTLS is set, we need to save the thread id + * (which is argument 5, child_tid, of clone) so it can be set + * during context switches. + */ + t->thread.arch.fs = t->thread.regs.regs.gp[R8 / sizeof(long)]; + + return 0; +} diff --git a/arch/um/sys-x86_64/um_module.c b/arch/um/sys-x86_64/um_module.c new file mode 100644 index 0000000..3dead39 --- /dev/null +++ b/arch/um/sys-x86_64/um_module.c @@ -0,0 +1,21 @@ +#include <linux/vmalloc.h> +#include <linux/moduleloader.h> + +/* Copied from i386 arch/i386/kernel/module.c */ +void *module_alloc(unsigned long size) +{ + if (size == 0) + return NULL; + return vmalloc_exec(size); +} + +/* Free memory returned from module_alloc */ +void module_free(struct module *mod, void *module_region) +{ + vfree(module_region); + /* + * FIXME: If module_region == mod->init_region, trim exception + * table entries. + */ +} + diff --git a/arch/um/sys-x86_64/user-offsets.c b/arch/um/sys-x86_64/user-offsets.c new file mode 100644 index 0000000..9735854 --- /dev/null +++ b/arch/um/sys-x86_64/user-offsets.c @@ -0,0 +1,65 @@ +#include <stdio.h> +#include <stddef.h> +#include <signal.h> +#include <sys/poll.h> +#include <sys/mman.h> +#include <sys/user.h> +#define __FRAME_OFFSETS +#include <asm/ptrace.h> +#include <asm/types.h> + +#define DEFINE(sym, val) \ + asm volatile("\n->" #sym " %0 " #val : : "i" (val)) + +#define DEFINE_LONGS(sym, val) \ + asm volatile("\n->" #sym " %0 " #val : : "i" (val/sizeof(unsigned long))) + +#define OFFSET(sym, str, mem) \ + DEFINE(sym, offsetof(struct str, mem)); + +void foo(void) +{ + OFFSET(HOST_SC_CR2, sigcontext, cr2); + OFFSET(HOST_SC_ERR, sigcontext, err); + OFFSET(HOST_SC_TRAPNO, sigcontext, trapno); + + DEFINE(HOST_FP_SIZE, sizeof(struct _fpstate) / sizeof(unsigned long)); + DEFINE_LONGS(HOST_RBX, RBX); + DEFINE_LONGS(HOST_RCX, RCX); + DEFINE_LONGS(HOST_RDI, RDI); + DEFINE_LONGS(HOST_RSI, RSI); + DEFINE_LONGS(HOST_RDX, RDX); + DEFINE_LONGS(HOST_RBP, RBP); + DEFINE_LONGS(HOST_RAX, RAX); + DEFINE_LONGS(HOST_R8, R8); + DEFINE_LONGS(HOST_R9, R9); + DEFINE_LONGS(HOST_R10, R10); + DEFINE_LONGS(HOST_R11, R11); + DEFINE_LONGS(HOST_R12, R12); + DEFINE_LONGS(HOST_R13, R13); + DEFINE_LONGS(HOST_R14, R14); + DEFINE_LONGS(HOST_R15, R15); + DEFINE_LONGS(HOST_ORIG_RAX, ORIG_RAX); + DEFINE_LONGS(HOST_CS, CS); + DEFINE_LONGS(HOST_SS, SS); + DEFINE_LONGS(HOST_EFLAGS, EFLAGS); +#if 0 + DEFINE_LONGS(HOST_FS, FS); + DEFINE_LONGS(HOST_GS, GS); + DEFINE_LONGS(HOST_DS, DS); + DEFINE_LONGS(HOST_ES, ES); +#endif + + DEFINE_LONGS(HOST_IP, RIP); + DEFINE_LONGS(HOST_SP, RSP); + DEFINE(UM_FRAME_SIZE, sizeof(struct user_regs_struct)); + + /* XXX Duplicated between i386 and x86_64 */ + DEFINE(UM_POLLIN, POLLIN); + DEFINE(UM_POLLPRI, POLLPRI); + DEFINE(UM_POLLOUT, POLLOUT); + + DEFINE(UM_PROT_READ, PROT_READ); + DEFINE(UM_PROT_WRITE, PROT_WRITE); + DEFINE(UM_PROT_EXEC, PROT_EXEC); +} |