From fcbb27b0ec6dcbc5a5108cb8fb19eae64593d204 Mon Sep 17 00:00:00 2001 From: Timothy Pearson Date: Wed, 23 Aug 2017 14:45:25 -0500 Subject: Initial import of modified Linux 2.6.28 tree Original upstream URL: git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git | branch linux-2.6.28.y --- init/Kconfig | 987 +++++++++++++++++++++++++++++++++++++++++++++++ init/Makefile | 36 ++ init/calibrate.c | 176 +++++++++ init/do_mounts.c | 412 ++++++++++++++++++++ init/do_mounts.h | 75 ++++ init/do_mounts_initrd.c | 129 +++++++ init/do_mounts_md.c | 302 +++++++++++++++ init/do_mounts_rd.c | 358 +++++++++++++++++ init/do_mounts_rd.c.orig | 344 +++++++++++++++++ init/initramfs.c | 576 +++++++++++++++++++++++++++ init/main.c | 888 ++++++++++++++++++++++++++++++++++++++++++ init/noinitramfs.c | 52 +++ init/version.c | 47 +++ 13 files changed, 4382 insertions(+) create mode 100644 init/Kconfig create mode 100644 init/Makefile create mode 100644 init/calibrate.c create mode 100644 init/do_mounts.c create mode 100644 init/do_mounts.h create mode 100644 init/do_mounts_initrd.c create mode 100644 init/do_mounts_md.c create mode 100644 init/do_mounts_rd.c create mode 100644 init/do_mounts_rd.c.orig create mode 100644 init/initramfs.c create mode 100644 init/main.c create mode 100644 init/noinitramfs.c create mode 100644 init/version.c (limited to 'init') diff --git a/init/Kconfig b/init/Kconfig new file mode 100644 index 0000000..d7f95fe --- /dev/null +++ b/init/Kconfig @@ -0,0 +1,987 @@ +config ARCH + string + option env="ARCH" + +config KERNELVERSION + string + option env="KERNELVERSION" + +config DEFCONFIG_LIST + string + depends on !UML + option defconfig_list + default "/lib/modules/$UNAME_RELEASE/.config" + default "/etc/kernel-config" + default "/boot/config-$UNAME_RELEASE" + default "$ARCH_DEFCONFIG" + default "arch/$ARCH/defconfig" + +menu "General setup" + +config EXPERIMENTAL + bool "Prompt for development and/or incomplete code/drivers" + ---help--- + Some of the various things that Linux supports (such as network + drivers, file systems, network protocols, etc.) can be in a state + of development where the functionality, stability, or the level of + testing is not yet high enough for general use. This is usually + known as the "alpha-test" phase among developers. If a feature is + currently in alpha-test, then the developers usually discourage + uninformed widespread use of this feature by the general public to + avoid "Why doesn't this work?" type mail messages. However, active + testing and use of these systems is welcomed. Just be aware that it + may not meet the normal level of reliability or it may fail to work + in some special cases. Detailed bug reports from people familiar + with the kernel internals are usually welcomed by the developers + (before submitting bug reports, please read the documents + , , , + , and + in the kernel source). + + This option will also make obsoleted drivers available. These are + drivers that have been replaced by something else, and/or are + scheduled to be removed in a future kernel release. + + Unless you intend to help test and develop a feature or driver that + falls into this category, or you have a situation that requires + using these features, you should probably say N here, which will + cause the configurator to present you with fewer choices. If + you say Y here, you will be offered the choice of using features or + drivers that are currently considered to be in the alpha-test phase. + +config BROKEN + bool + +config BROKEN_ON_SMP + bool + depends on BROKEN || !SMP + default y + +config LOCK_KERNEL + bool + depends on SMP || PREEMPT + default y + +config INIT_ENV_ARG_LIMIT + int + default 32 if !UML + default 128 if UML + help + Maximum of each of the number of arguments and environment + variables passed to init from the kernel command line. + + +config LOCALVERSION + string "Local version - append to kernel release" + help + Append an extra string to the end of your kernel version. + This will show up when you type uname, for example. + The string you set here will be appended after the contents of + any files with a filename matching localversion* in your + object and source tree, in that order. Your total string can + be a maximum of 64 characters. + +config LOCALVERSION_AUTO + bool "Automatically append version information to the version string" + default y + help + This will try to automatically determine if the current tree is a + release tree by looking for git tags that belong to the current + top of tree revision. + + A string of the format -gxxxxxxxx will be added to the localversion + if a git-based tree is found. The string generated by this will be + appended after any matching localversion* files, and after the value + set in CONFIG_LOCALVERSION. + + (The actual string used here is the first eight characters produced + by running the command: + + $ git rev-parse --verify HEAD + + which is done within the script "scripts/setlocalversion".) + +choice + prompt "Kernel compression mode" + default KERNEL_GZIP + help + The linux kernel is a kind of self-extracting executable. + Several compression algorithms are available, which differ + in efficiency, compression and decompression speed. + Compression speed is only relevant when building a kernel. + Decompression speed is relevant at each boot. + + If you have any problems with bzip2 or lzma compressed + kernels, mail me (Alain Knaff) . (An older + version of this functionality (bzip2 only), for 2.4, was + supplied by Christian Ludwig) + + High compression options are mostly useful for users, who + are low on disk space (embedded systems), but for whom ram + size matters less. + + If in doubt, select 'gzip' + +config KERNEL_GZIP + bool "Gzip" + help + The old and tried gzip compression. Its compression ratio is + the poorest among the 3 choices; however its speed (both + compression and decompression) is the fastest. + +config KERNEL_BZIP2 + bool "Bzip2" + help + Its compression ratio and speed is intermediate. + Decompression speed is slowest among the 3. + The kernel size is about 10 per cent smaller with bzip2, + in comparison to gzip. + Bzip2 uses a large amount of memory. For modern kernels + you will need at least 8MB RAM or more for booting. + +config KERNEL_LZMA + bool "LZMA" + help + The most recent compression algorithm. + Its ratio is best, decompression speed is between the other + 2. Compression is slowest. + The kernel size is about 33 per cent smaller with lzma, + in comparison to gzip. + +endchoice + + +config SWAP + bool "Support for paging of anonymous memory (swap)" + depends on MMU && BLOCK + default y + help + This option allows you to choose whether you want to have support + for so called swap devices or swap files in your kernel that are + used to provide more virtual memory than the actual RAM present + in your computer. If unsure say Y. + +config SYSVIPC + bool "System V IPC" + ---help--- + Inter Process Communication is a suite of library functions and + system calls which let processes (running programs) synchronize and + exchange information. It is generally considered to be a good thing, + and some programs won't run unless you say Y here. In particular, if + you want to run the DOS emulator dosemu under Linux (read the + DOSEMU-HOWTO, available from ), + you'll need to say Y here. + + You can find documentation about IPC with "info ipc" and also in + section 6.4 of the Linux Programmer's Guide, available from + . + +config SYSVIPC_SYSCTL + bool + depends on SYSVIPC + depends on SYSCTL + default y + +config POSIX_MQUEUE + bool "POSIX Message Queues" + depends on NET && EXPERIMENTAL + ---help--- + POSIX variant of message queues is a part of IPC. In POSIX message + queues every message has a priority which decides about succession + of receiving it by a process. If you want to compile and run + programs written e.g. for Solaris with use of its POSIX message + queues (functions mq_*) say Y here. + + POSIX message queues are visible as a filesystem called 'mqueue' + and can be mounted somewhere if you want to do filesystem + operations on message queues. + + If unsure, say Y. + +config BSD_PROCESS_ACCT + bool "BSD Process Accounting" + help + If you say Y here, a user level program will be able to instruct the + kernel (via a special system call) to write process accounting + information to a file: whenever a process exits, information about + that process will be appended to the file by the kernel. The + information includes things such as creation time, owning user, + command name, memory usage, controlling terminal etc. (the complete + list is in the struct acct in ). It is + up to the user level program to do useful things with this + information. This is generally a good idea, so say Y. + +config BSD_PROCESS_ACCT_V3 + bool "BSD Process Accounting version 3 file format" + depends on BSD_PROCESS_ACCT + default n + help + If you say Y here, the process accounting information is written + in a new file format that also logs the process IDs of each + process and it's parent. Note that this file format is incompatible + with previous v0/v1/v2 file formats, so you will need updated tools + for processing it. A preliminary version of these tools is available + at . + +config TASKSTATS + bool "Export task/process statistics through netlink (EXPERIMENTAL)" + depends on NET + default n + help + Export selected statistics for tasks/processes through the + generic netlink interface. Unlike BSD process accounting, the + statistics are available during the lifetime of tasks/processes as + responses to commands. Like BSD accounting, they are sent to user + space on task exit. + + Say N if unsure. + +config TASK_DELAY_ACCT + bool "Enable per-task delay accounting (EXPERIMENTAL)" + depends on TASKSTATS + help + Collect information on time spent by a task waiting for system + resources like cpu, synchronous block I/O completion and swapping + in pages. Such statistics can help in setting a task's priorities + relative to other tasks for cpu, io, rss limits etc. + + Say N if unsure. + +config TASK_XACCT + bool "Enable extended accounting over taskstats (EXPERIMENTAL)" + depends on TASKSTATS + help + Collect extended task accounting data and send the data + to userland for processing over the taskstats interface. + + Say N if unsure. + +config TASK_IO_ACCOUNTING + bool "Enable per-task storage I/O accounting (EXPERIMENTAL)" + depends on TASK_XACCT + help + Collect information on the number of bytes of storage I/O which this + task has caused. + + Say N if unsure. + +config AUDIT + bool "Auditing support" + depends on NET + help + Enable auditing infrastructure that can be used with another + kernel subsystem, such as SELinux (which requires this for + logging of avc messages output). Does not do system-call + auditing without CONFIG_AUDITSYSCALL. + +config AUDITSYSCALL + bool "Enable system-call auditing support" + depends on AUDIT && (X86 || PPC || PPC64 || S390 || IA64 || UML || SPARC64|| SUPERH) + default y if SECURITY_SELINUX + help + Enable low-overhead system-call auditing infrastructure that + can be used independently or with another kernel subsystem, + such as SELinux. To use audit's filesystem watch feature, please + ensure that INOTIFY is configured. + +config AUDIT_TREE + def_bool y + depends on AUDITSYSCALL && INOTIFY + +config IKCONFIG + tristate "Kernel .config support" + ---help--- + This option enables the complete Linux kernel ".config" file + contents to be saved in the kernel. It provides documentation + of which kernel options are used in a running kernel or in an + on-disk kernel. This information can be extracted from the kernel + image file with the script scripts/extract-ikconfig and used as + input to rebuild the current kernel or to build another kernel. + It can also be extracted from a running kernel by reading + /proc/config.gz if enabled (below). + +config IKCONFIG_PROC + bool "Enable access to .config through /proc/config.gz" + depends on IKCONFIG && PROC_FS + ---help--- + This option enables access to the kernel configuration file + through /proc/config.gz. + +config LOG_BUF_SHIFT + int "Kernel log buffer size (16 => 64KB, 17 => 128KB)" + range 12 21 + default 17 + help + Select kernel log buffer size as a power of 2. + Examples: + 17 => 128 KB + 16 => 64 KB + 15 => 32 KB + 14 => 16 KB + 13 => 8 KB + 12 => 4 KB + +config CGROUPS + bool "Control Group support" + help + This option will let you use process cgroup subsystems + such as Cpusets + + Say N if unsure. + +config CGROUP_DEBUG + bool "Example debug cgroup subsystem" + depends on CGROUPS + default n + help + This option enables a simple cgroup subsystem that + exports useful debugging information about the cgroups + framework + + Say N if unsure + +config CGROUP_NS + bool "Namespace cgroup subsystem" + depends on CGROUPS + help + Provides a simple namespace cgroup subsystem to + provide hierarchical naming of sets of namespaces, + for instance virtual servers and checkpoint/restart + jobs. + +config CGROUP_FREEZER + bool "control group freezer subsystem" + depends on CGROUPS + help + Provides a way to freeze and unfreeze all tasks in a + cgroup. + +config CGROUP_DEVICE + bool "Device controller for cgroups" + depends on CGROUPS && EXPERIMENTAL + help + Provides a cgroup implementing whitelists for devices which + a process in the cgroup can mknod or open. + +config CPUSETS + bool "Cpuset support" + depends on SMP && CGROUPS + help + This option will let you create and manage CPUSETs which + allow dynamically partitioning a system into sets of CPUs and + Memory Nodes and assigning tasks to run only within those sets. + This is primarily useful on large SMP or NUMA systems. + + Say N if unsure. + +# +# Architectures with an unreliable sched_clock() should select this: +# +config HAVE_UNSTABLE_SCHED_CLOCK + bool + +config GROUP_SCHED + bool "Group CPU scheduler" + depends on EXPERIMENTAL + default n + help + This feature lets CPU scheduler recognize task groups and control CPU + bandwidth allocation to such task groups. + +config FAIR_GROUP_SCHED + bool "Group scheduling for SCHED_OTHER" + depends on GROUP_SCHED + default GROUP_SCHED + +config RT_GROUP_SCHED + bool "Group scheduling for SCHED_RR/FIFO" + depends on EXPERIMENTAL + depends on GROUP_SCHED + default n + help + This feature lets you explicitly allocate real CPU bandwidth + to users or control groups (depending on the "Basis for grouping tasks" + setting below. If enabled, it will also make it impossible to + schedule realtime tasks for non-root users until you allocate + realtime bandwidth for them. + See Documentation/scheduler/sched-rt-group.txt for more information. + +choice + depends on GROUP_SCHED + prompt "Basis for grouping tasks" + default USER_SCHED + +config USER_SCHED + bool "user id" + help + This option will choose userid as the basis for grouping + tasks, thus providing equal CPU bandwidth to each user. + +config CGROUP_SCHED + bool "Control groups" + depends on CGROUPS + help + This option allows you to create arbitrary task groups + using the "cgroup" pseudo filesystem and control + the cpu bandwidth allocated to each such task group. + Refer to Documentation/cgroups.txt for more information + on "cgroup" pseudo filesystem. + +endchoice + +config CGROUP_CPUACCT + bool "Simple CPU accounting cgroup subsystem" + depends on CGROUPS + help + Provides a simple Resource Controller for monitoring the + total CPU consumed by the tasks in a cgroup + +config RESOURCE_COUNTERS + bool "Resource counters" + help + This option enables controller independent resource accounting + infrastructure that works with cgroups + depends on CGROUPS + +config MM_OWNER + bool + +config CGROUP_MEM_RES_CTLR + bool "Memory Resource Controller for Control Groups" + depends on CGROUPS && RESOURCE_COUNTERS + select MM_OWNER + help + Provides a memory resource controller that manages both anonymous + memory and page cache. (See Documentation/controllers/memory.txt) + + Note that setting this option increases fixed memory overhead + associated with each page of memory in the system. By this, + 20(40)bytes/PAGE_SIZE on 32(64)bit system will be occupied by memory + usage tracking struct at boot. Total amount of this is printed out + at boot. + + Only enable when you're ok with these trade offs and really + sure you need the memory resource controller. Even when you enable + this, you can set "cgroup_disable=memory" at your boot option to + disable memory resource controller and you can avoid overheads. + (and lose benefits of memory resource contoller) + + This config option also selects MM_OWNER config option, which + could in turn add some fork/exit overhead. + +config SYSFS_DEPRECATED + bool + +config SYSFS_DEPRECATED_V2 + bool "Create deprecated sysfs files" + depends on SYSFS + default y + select SYSFS_DEPRECATED + help + This option creates deprecated symlinks such as the + "device"-link, the :-link, and the + "bus"-link. It may also add deprecated key in the + uevent environment. + None of these features or values should be used today, as + they export driver core implementation details to userspace + or export properties which can't be kept stable across kernel + releases. + + If enabled, this option will also move any device structures + that belong to a class, back into the /sys/class hierarchy, in + order to support older versions of udev and some userspace + programs. + + If you are using a distro with the most recent userspace + packages, it should be safe to say N here. + +config PROC_PID_CPUSET + bool "Include legacy /proc//cpuset file" + depends on CPUSETS + default y + +config RELAY + bool "Kernel->user space relay support (formerly relayfs)" + help + This option enables support for relay interface support in + certain file systems (such as debugfs). + It is designed to provide an efficient mechanism for tools and + facilities to relay large amounts of data from kernel space to + user space. + + If unsure, say N. + +config NAMESPACES + bool "Namespaces support" if EMBEDDED + default !EMBEDDED + help + Provides the way to make tasks work with different objects using + the same id. For example same IPC id may refer to different objects + or same user id or pid may refer to different tasks when used in + different namespaces. + +config UTS_NS + bool "UTS namespace" + depends on NAMESPACES + help + In this namespace tasks see different info provided with the + uname() system call + +config IPC_NS + bool "IPC namespace" + depends on NAMESPACES && SYSVIPC + help + In this namespace tasks work with IPC ids which correspond to + different IPC objects in different namespaces + +config USER_NS + bool "User namespace (EXPERIMENTAL)" + depends on NAMESPACES && EXPERIMENTAL + help + This allows containers, i.e. vservers, to use user namespaces + to provide different user info for different servers. + If unsure, say N. + +config PID_NS + bool "PID Namespaces (EXPERIMENTAL)" + default n + depends on NAMESPACES && EXPERIMENTAL + help + Support process id namespaces. This allows having multiple + process with the same pid as long as they are in different + pid namespaces. This is a building block of containers. + + Unless you want to work with an experimental feature + say N here. + +config BLK_DEV_INITRD + bool "Initial RAM filesystem and RAM disk (initramfs/initrd) support" + depends on BROKEN || !FRV + help + The initial RAM filesystem is a ramfs which is loaded by the + boot loader (loadlin or lilo) and that is mounted as root + before the normal boot procedure. It is typically used to + load modules needed to mount the "real" root file system, + etc. See for details. + + If RAM disk support (BLK_DEV_RAM) is also included, this + also enables initial RAM disk (initrd) support and adds + 15 Kbytes (more on some other architectures) to the kernel size. + + If unsure say Y. + +if BLK_DEV_INITRD + +source "usr/Kconfig" + +endif + +config CC_OPTIMIZE_FOR_SIZE + bool "Optimize for size" + default y + help + Enabling this option will pass "-Os" instead of "-O2" to gcc + resulting in a smaller kernel. + + If unsure, say Y. + +config SYSCTL + bool + +config ANON_INODES + bool + +menuconfig EMBEDDED + bool "Configure standard kernel features (for small systems)" + help + This option allows certain base kernel options and settings + to be disabled or tweaked. This is for specialized + environments which can tolerate a "non-standard" kernel. + Only use this if you really know what you are doing. + +config UID16 + bool "Enable 16-bit UID system calls" if EMBEDDED + depends on ARM || BLACKFIN || CRIS || FRV || H8300 || X86_32 || M68K || (S390 && !64BIT) || SUPERH || SPARC32 || (SPARC64 && COMPAT) || UML || (X86_64 && IA32_EMULATION) + default y + help + This enables the legacy 16-bit UID syscall wrappers. + +config SYSCTL_SYSCALL + bool "Sysctl syscall support" if EMBEDDED + default y + select SYSCTL + ---help--- + sys_sysctl uses binary paths that have been found challenging + to properly maintain and use. The interface in /proc/sys + using paths with ascii names is now the primary path to this + information. + + Almost nothing using the binary sysctl interface so if you are + trying to save some space it is probably safe to disable this, + making your kernel marginally smaller. + + If unsure say Y here. + +config KALLSYMS + bool "Load all symbols for debugging/ksymoops" if EMBEDDED + default y + help + Say Y here to let the kernel print out symbolic crash information and + symbolic stack backtraces. This increases the size of the kernel + somewhat, as all symbols have to be loaded into the kernel image. + +config KALLSYMS_ALL + bool "Include all symbols in kallsyms" + depends on DEBUG_KERNEL && KALLSYMS + help + Normally kallsyms only contains the symbols of functions, for nicer + OOPS messages. Some debuggers can use kallsyms for other + symbols too: say Y here to include all symbols, if you need them + and you don't care about adding 300k to the size of your kernel. + + Say N. + +config KALLSYMS_EXTRA_PASS + bool "Do an extra kallsyms pass" + depends on KALLSYMS + help + If kallsyms is not working correctly, the build will fail with + inconsistent kallsyms data. If that occurs, log a bug report and + turn on KALLSYMS_EXTRA_PASS which should result in a stable build. + Always say N here unless you find a bug in kallsyms, which must be + reported. KALLSYMS_EXTRA_PASS is only a temporary workaround while + you wait for kallsyms to be fixed. + + +config HOTPLUG + bool "Support for hot-pluggable devices" if EMBEDDED + default y + help + This option is provided for the case where no hotplug or uevent + capabilities is wanted by the kernel. You should only consider + disabling this option for embedded systems that do not use modules, a + dynamic /dev tree, or dynamic device discovery. Just say Y. + +config PRINTK + default y + bool "Enable support for printk" if EMBEDDED + help + This option enables normal printk support. Removing it + eliminates most of the message strings from the kernel image + and makes the kernel more or less silent. As this makes it + very difficult to diagnose system problems, saying N here is + strongly discouraged. + +config BUG + bool "BUG() support" if EMBEDDED + default y + help + Disabling this option eliminates support for BUG and WARN, reducing + the size of your kernel image and potentially quietly ignoring + numerous fatal conditions. You should only consider disabling this + option for embedded systems with no facilities for reporting errors. + Just say Y. + +config ELF_CORE + default y + bool "Enable ELF core dumps" if EMBEDDED + help + Enable support for generating core dumps. Disabling saves about 4k. + +config PCSPKR_PLATFORM + bool "Enable PC-Speaker support" if EMBEDDED + depends on ALPHA || X86 || MIPS || PPC_PREP || PPC_CHRP || PPC_PSERIES + default y + help + This option allows to disable the internal PC-Speaker + support, saving some memory. + +config BASE_FULL + default y + bool "Enable full-sized data structures for core" if EMBEDDED + help + Disabling this option reduces the size of miscellaneous core + kernel data structures. This saves memory on small machines, + but may reduce performance. + +config FUTEX + bool "Enable futex support" if EMBEDDED + default y + select RT_MUTEXES + help + Disabling this option will cause the kernel to be built without + support for "fast userspace mutexes". The resulting kernel may not + run glibc-based applications correctly. + +config EPOLL + bool "Enable eventpoll support" if EMBEDDED + default y + select ANON_INODES + help + Disabling this option will cause the kernel to be built without + support for epoll family of system calls. + +config SIGNALFD + bool "Enable signalfd() system call" if EMBEDDED + select ANON_INODES + default y + help + Enable the signalfd() system call that allows to receive signals + on a file descriptor. + + If unsure, say Y. + +config TIMERFD + bool "Enable timerfd() system call" if EMBEDDED + select ANON_INODES + default y + help + Enable the timerfd() system call that allows to receive timer + events on a file descriptor. + + If unsure, say Y. + +config EVENTFD + bool "Enable eventfd() system call" if EMBEDDED + select ANON_INODES + default y + help + Enable the eventfd() system call that allows to receive both + kernel notification (ie. KAIO) or userspace notifications. + + If unsure, say Y. + +config SHMEM + bool "Use full shmem filesystem" if EMBEDDED + default y + depends on MMU + help + The shmem is an internal filesystem used to manage shared memory. + It is backed by swap and manages resource limits. It is also exported + to userspace as tmpfs if TMPFS is enabled. Disabling this + option replaces shmem and tmpfs with the much simpler ramfs code, + which may be appropriate on small systems without swap. + +config AIO + bool "Enable AIO support" if EMBEDDED + default y + help + This option enables POSIX asynchronous I/O which may by used + by some high performance threaded applications. Disabling + this option saves about 7k. + +config VM_EVENT_COUNTERS + default y + bool "Enable VM event counters for /proc/vmstat" if EMBEDDED + help + VM event counters are needed for event counts to be shown. + This option allows the disabling of the VM event counters + on EMBEDDED systems. /proc/vmstat will only show page counts + if VM event counters are disabled. + +config PCI_QUIRKS + default y + bool "Enable PCI quirk workarounds" if EMBEDDED + depends on PCI + help + This enables workarounds for various PCI chipset + bugs/quirks. Disable this only if your target machine is + unaffected by PCI quirks. + +config SLUB_DEBUG + default y + bool "Enable SLUB debugging support" if EMBEDDED + depends on SLUB && SYSFS + help + SLUB has extensive debug support features. Disabling these can + result in significant savings in code size. This also disables + SLUB sysfs support. /sys/slab will not exist and there will be + no support for cache validation etc. + +config COMPAT_BRK + bool "Disable heap randomization" + default y + help + Randomizing heap placement makes heap exploits harder, but it + also breaks ancient binaries (including anything libc5 based). + This option changes the bootup default to heap randomization + disabled, and can be overriden runtime by setting + /proc/sys/kernel/randomize_va_space to 2. + + On non-ancient distros (post-2000 ones) N is usually a safe choice. + +choice + prompt "Choose SLAB allocator" + default SLUB + help + This option allows to select a slab allocator. + +config SLAB + bool "SLAB" + help + The regular slab allocator that is established and known to work + well in all environments. It organizes cache hot objects in + per cpu and per node queues. + +config SLUB + bool "SLUB (Unqueued Allocator)" + help + SLUB is a slab allocator that minimizes cache line usage + instead of managing queues of cached objects (SLAB approach). + Per cpu caching is realized using slabs of objects instead + of queues of objects. SLUB can use memory efficiently + and has enhanced diagnostics. SLUB is the default choice for + a slab allocator. + +config SLOB + depends on EMBEDDED + bool "SLOB (Simple Allocator)" + help + SLOB replaces the stock allocator with a drastically simpler + allocator. SLOB is generally more space efficient but + does not perform as well on large systems. + +endchoice + +config PROFILING + bool "Profiling support (EXPERIMENTAL)" + help + Say Y here to enable the extended profiling support mechanisms used + by profilers such as OProfile. + +# +# Place an empty function call at each tracepoint site. Can be +# dynamically changed for a probe function. +# +config TRACEPOINTS + bool + +config MARKERS + bool "Activate markers" + help + Place an empty function call at each marker site. Can be + dynamically changed for a probe function. + +source "arch/Kconfig" + +endmenu # General setup + +config HAVE_GENERIC_DMA_COHERENT + bool + default n + +config SLABINFO + bool + depends on PROC_FS + depends on SLAB || SLUB_DEBUG + default y + +config RT_MUTEXES + boolean + select PLIST + +config TINY_SHMEM + default !SHMEM + bool + +config BASE_SMALL + int + default 0 if BASE_FULL + default 1 if !BASE_FULL + +menuconfig MODULES + bool "Enable loadable module support" + help + Kernel modules are small pieces of compiled code which can + be inserted in the running kernel, rather than being + permanently built into the kernel. You use the "modprobe" + tool to add (and sometimes remove) them. If you say Y here, + many parts of the kernel can be built as modules (by + answering M instead of Y where indicated): this is most + useful for infrequently used options which are not required + for booting. For more information, see the man pages for + modprobe, lsmod, modinfo, insmod and rmmod. + + If you say Y here, you will need to run "make + modules_install" to put the modules under /lib/modules/ + where modprobe can find them (you may need to be root to do + this). + + If unsure, say Y. + +if MODULES + +config MODULE_FORCE_LOAD + bool "Forced module loading" + default n + help + Allow loading of modules without version information (ie. modprobe + --force). Forced module loading sets the 'F' (forced) taint flag and + is usually a really bad idea. + +config MODULE_UNLOAD + bool "Module unloading" + help + Without this option you will not be able to unload any + modules (note that some modules may not be unloadable + anyway), which makes your kernel smaller, faster + and simpler. If unsure, say Y. + +config MODULE_FORCE_UNLOAD + bool "Forced module unloading" + depends on MODULE_UNLOAD && EXPERIMENTAL + help + This option allows you to force a module to unload, even if the + kernel believes it is unsafe: the kernel will remove the module + without waiting for anyone to stop using it (using the -f option to + rmmod). This is mainly for kernel developers and desperate users. + If unsure, say N. + +config MODVERSIONS + bool "Module versioning support" + help + Usually, you have to use modules compiled with your kernel. + Saying Y here makes it sometimes possible to use modules + compiled for different kernels, by adding enough information + to the modules to (hopefully) spot any changes which would + make them incompatible with the kernel you are running. If + unsure, say N. + +config MODULE_SRCVERSION_ALL + bool "Source checksum for all modules" + help + Modules which contain a MODULE_VERSION get an extra "srcversion" + field inserted into their modinfo section, which contains a + sum of the source files which made it. This helps maintainers + see exactly which source was used to build a module (since + others sometimes change the module source without updating + the version). With this option, such a "srcversion" field + will be created for all modules. If unsure, say N. + +config KMOD + def_bool y + help + This is being removed soon. These days, CONFIG_MODULES + implies CONFIG_KMOD, so use that instead. + +endif # MODULES + +config STOP_MACHINE + bool + default y + depends on (SMP && MODULE_UNLOAD) || HOTPLUG_CPU + help + Need stop_machine() primitive. + +source "block/Kconfig" + +config PREEMPT_NOTIFIERS + bool + +config CLASSIC_RCU + def_bool !PREEMPT_RCU + help + This option selects the classic RCU implementation that is + designed for best read-side performance on non-realtime + systems. Classic RCU is the default. Note that the + PREEMPT_RCU symbol is used to select/deselect this option. diff --git a/init/Makefile b/init/Makefile new file mode 100644 index 0000000..4a243df --- /dev/null +++ b/init/Makefile @@ -0,0 +1,36 @@ +# +# Makefile for the linux kernel. +# + +obj-y := main.o version.o mounts.o +ifneq ($(CONFIG_BLK_DEV_INITRD),y) +obj-y += noinitramfs.o +else +obj-$(CONFIG_BLK_DEV_INITRD) += initramfs.o +endif +obj-$(CONFIG_GENERIC_CALIBRATE_DELAY) += calibrate.o + +mounts-y := do_mounts.o +mounts-$(CONFIG_BLK_DEV_RAM) += do_mounts_rd.o +mounts-$(CONFIG_BLK_DEV_INITRD) += do_mounts_initrd.o +mounts-$(CONFIG_BLK_DEV_MD) += do_mounts_md.o + +# files to be removed upon make clean +clean-files := ../include/linux/compile.h + +# dependencies on generated files need to be listed explicitly + +$(obj)/version.o: include/linux/compile.h + +# compile.h changes depending on hostname, generation number, etc, +# so we regenerate it always. +# mkcompile_h will make sure to only update the +# actual file if its content has changed. + + chk_compile.h = : + quiet_chk_compile.h = echo ' CHK $@' +silent_chk_compile.h = : +include/linux/compile.h: FORCE + @$($(quiet)chk_compile.h) + $(Q)$(CONFIG_SHELL) $(srctree)/scripts/mkcompile_h $@ \ + "$(UTS_MACHINE)" "$(CONFIG_SMP)" "$(CONFIG_PREEMPT)" "$(CC) $(KBUILD_CFLAGS)" diff --git a/init/calibrate.c b/init/calibrate.c new file mode 100644 index 0000000..a379c90 --- /dev/null +++ b/init/calibrate.c @@ -0,0 +1,176 @@ +/* calibrate.c: default delay calibration + * + * Excised from init/main.c + * Copyright (C) 1991, 1992 Linus Torvalds + */ + +#include +#include +#include +#include +#include + +unsigned long lpj_fine; +unsigned long preset_lpj; +static int __init lpj_setup(char *str) +{ + preset_lpj = simple_strtoul(str,NULL,0); + return 1; +} + +__setup("lpj=", lpj_setup); + +#ifdef ARCH_HAS_READ_CURRENT_TIMER + +/* This routine uses the read_current_timer() routine and gets the + * loops per jiffy directly, instead of guessing it using delay(). + * Also, this code tries to handle non-maskable asynchronous events + * (like SMIs) + */ +#define DELAY_CALIBRATION_TICKS ((HZ < 100) ? 1 : (HZ/100)) +#define MAX_DIRECT_CALIBRATION_RETRIES 5 + +static unsigned long __cpuinit calibrate_delay_direct(void) +{ + unsigned long pre_start, start, post_start; + unsigned long pre_end, end, post_end; + unsigned long start_jiffies; + unsigned long timer_rate_min, timer_rate_max; + unsigned long good_timer_sum = 0; + unsigned long good_timer_count = 0; + int i; + + if (read_current_timer(&pre_start) < 0 ) + return 0; + + /* + * A simple loop like + * while ( jiffies < start_jiffies+1) + * start = read_current_timer(); + * will not do. As we don't really know whether jiffy switch + * happened first or timer_value was read first. And some asynchronous + * event can happen between these two events introducing errors in lpj. + * + * So, we do + * 1. pre_start <- When we are sure that jiffy switch hasn't happened + * 2. check jiffy switch + * 3. start <- timer value before or after jiffy switch + * 4. post_start <- When we are sure that jiffy switch has happened + * + * Note, we don't know anything about order of 2 and 3. + * Now, by looking at post_start and pre_start difference, we can + * check whether any asynchronous event happened or not + */ + + for (i = 0; i < MAX_DIRECT_CALIBRATION_RETRIES; i++) { + pre_start = 0; + read_current_timer(&start); + start_jiffies = jiffies; + while (jiffies <= (start_jiffies + 1)) { + pre_start = start; + read_current_timer(&start); + } + read_current_timer(&post_start); + + pre_end = 0; + end = post_start; + while (jiffies <= + (start_jiffies + 1 + DELAY_CALIBRATION_TICKS)) { + pre_end = end; + read_current_timer(&end); + } + read_current_timer(&post_end); + + timer_rate_max = (post_end - pre_start) / + DELAY_CALIBRATION_TICKS; + timer_rate_min = (pre_end - post_start) / + DELAY_CALIBRATION_TICKS; + + /* + * If the upper limit and lower limit of the timer_rate is + * >= 12.5% apart, redo calibration. + */ + if (pre_start != 0 && pre_end != 0 && + (timer_rate_max - timer_rate_min) < (timer_rate_max >> 3)) { + good_timer_count++; + good_timer_sum += timer_rate_max; + } + } + + if (good_timer_count) + return (good_timer_sum/good_timer_count); + + printk(KERN_WARNING "calibrate_delay_direct() failed to get a good " + "estimate for loops_per_jiffy.\nProbably due to long platform interrupts. Consider using \"lpj=\" boot option.\n"); + return 0; +} +#else +static unsigned long __cpuinit calibrate_delay_direct(void) {return 0;} +#endif + +/* + * This is the number of bits of precision for the loops_per_jiffy. Each + * bit takes on average 1.5/HZ seconds. This (like the original) is a little + * better than 1% + * For the boot cpu we can skip the delay calibration and assign it a value + * calculated based on the timer frequency. + * For the rest of the CPUs we cannot assume that the timer frequency is same as + * the cpu frequency, hence do the calibration for those. + */ +#define LPS_PREC 8 + +void __cpuinit calibrate_delay(void) +{ + unsigned long ticks, loopbit; + int lps_precision = LPS_PREC; + + if (preset_lpj) { + loops_per_jiffy = preset_lpj; + printk(KERN_INFO + "Calibrating delay loop (skipped) preset value.. "); + } else if ((smp_processor_id() == 0) && lpj_fine) { + loops_per_jiffy = lpj_fine; + printk(KERN_INFO + "Calibrating delay loop (skipped), " + "value calculated using timer frequency.. "); + } else if ((loops_per_jiffy = calibrate_delay_direct()) != 0) { + printk(KERN_INFO + "Calibrating delay using timer specific routine.. "); + } else { + loops_per_jiffy = (1<<12); + + printk(KERN_INFO "Calibrating delay loop... "); + while ((loops_per_jiffy <<= 1) != 0) { + /* wait for "start of" clock tick */ + ticks = jiffies; + while (ticks == jiffies) + /* nothing */; + /* Go .. */ + ticks = jiffies; + __delay(loops_per_jiffy); + ticks = jiffies - ticks; + if (ticks) + break; + } + + /* + * Do a binary approximation to get loops_per_jiffy set to + * equal one clock (up to lps_precision bits) + */ + loops_per_jiffy >>= 1; + loopbit = loops_per_jiffy; + while (lps_precision-- && (loopbit >>= 1)) { + loops_per_jiffy |= loopbit; + ticks = jiffies; + while (ticks == jiffies) + /* nothing */; + ticks = jiffies; + __delay(loops_per_jiffy); + if (jiffies != ticks) /* longer than 1 tick */ + loops_per_jiffy &= ~loopbit; + } + } + printk(KERN_CONT "%lu.%02lu BogoMIPS (lpj=%lu)\n", + loops_per_jiffy/(500000/HZ), + (loops_per_jiffy/(5000/HZ)) % 100, loops_per_jiffy); +} diff --git a/init/do_mounts.c b/init/do_mounts.c new file mode 100644 index 0000000..d055b19 --- /dev/null +++ b/init/do_mounts.c @@ -0,0 +1,412 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "do_mounts.h" + +int __initdata rd_doload; /* 1 = load RAM disk, 0 = don't load */ + +int root_mountflags = MS_RDONLY | MS_SILENT; +static char * __initdata root_device_name; +static char __initdata saved_root_name[64]; +static int __initdata root_wait; + +dev_t ROOT_DEV; + +static int __init load_ramdisk(char *str) +{ + rd_doload = simple_strtol(str,NULL,0) & 3; + return 1; +} +__setup("load_ramdisk=", load_ramdisk); + +static int __init readonly(char *str) +{ + if (*str) + return 0; + root_mountflags |= MS_RDONLY; + return 1; +} + +static int __init readwrite(char *str) +{ + if (*str) + return 0; + root_mountflags &= ~MS_RDONLY; + return 1; +} + +__setup("ro", readonly); +__setup("rw", readwrite); + +/* + * Convert a name into device number. We accept the following variants: + * + * 1) device number in hexadecimal represents itself + * 2) /dev/nfs represents Root_NFS (0xff) + * 3) /dev/ represents the device number of disk + * 4) /dev/ represents the device number + * of partition - device number of disk plus the partition number + * 5) /dev/p - same as the above, that form is + * used when disk name of partitioned disk ends on a digit. + * + * If name doesn't have fall into the categories above, we return (0,0). + * block_class is used to check if something is a disk name. If the disk + * name contains slashes, the device name has them replaced with + * bangs. + */ + +dev_t name_to_dev_t(char *name) +{ + char s[32]; + char *p; + dev_t res = 0; + int part; + + if (strncmp(name, "/dev/", 5) != 0) { + unsigned maj, min; + + if (sscanf(name, "%u:%u", &maj, &min) == 2) { + res = MKDEV(maj, min); + if (maj != MAJOR(res) || min != MINOR(res)) + goto fail; + } else { + res = new_decode_dev(simple_strtoul(name, &p, 16)); + if (*p) + goto fail; + } + goto done; + } + + name += 5; + res = Root_NFS; + if (strcmp(name, "nfs") == 0) + goto done; + res = Root_RAM0; + if (strcmp(name, "ram") == 0) + goto done; + + if (strlen(name) > 31) + goto fail; + strcpy(s, name); + for (p = s; *p; p++) + if (*p == '/') + *p = '!'; + res = blk_lookup_devt(s, 0); + if (res) + goto done; + + /* + * try non-existant, but valid partition, which may only exist + * after revalidating the disk, like partitioned md devices + */ + while (p > s && isdigit(p[-1])) + p--; + if (p == s || !*p || *p == '0') + goto fail; + + /* try disk name without */ + part = simple_strtoul(p, NULL, 10); + *p = '\0'; + res = blk_lookup_devt(s, part); + if (res) + goto done; + + /* try disk name without p */ + if (p < s + 2 || !isdigit(p[-2]) || p[-1] != 'p') + goto fail; + p[-1] = '\0'; + res = blk_lookup_devt(s, part); + if (res) + goto done; + +fail: + return 0; +done: + return res; +} + +static int __init root_dev_setup(char *line) +{ + strlcpy(saved_root_name, line, sizeof(saved_root_name)); + return 1; +} + +__setup("root=", root_dev_setup); + +static int __init rootwait_setup(char *str) +{ + if (*str) + return 0; + root_wait = 1; + return 1; +} + +__setup("rootwait", rootwait_setup); + +static char * __initdata root_mount_data; +static int __init root_data_setup(char *str) +{ + root_mount_data = str; + return 1; +} + +static char * __initdata root_fs_names; +static int __init fs_names_setup(char *str) +{ + root_fs_names = str; + return 1; +} + +static unsigned int __initdata root_delay; +static int __init root_delay_setup(char *str) +{ + root_delay = simple_strtoul(str, NULL, 0); + return 1; +} + +__setup("rootflags=", root_data_setup); +__setup("rootfstype=", fs_names_setup); +__setup("rootdelay=", root_delay_setup); + +static void __init get_fs_names(char *page) +{ + char *s = page; + + if (root_fs_names) { + strcpy(page, root_fs_names); + while (*s++) { + if (s[-1] == ',') + s[-1] = '\0'; + } + } else { + int len = get_filesystem_list(page); + char *p, *next; + + page[len] = '\0'; + for (p = page-1; p; p = next) { + next = strchr(++p, '\n'); + if (*p++ != '\t') + continue; + while ((*s++ = *p++) != '\n') + ; + s[-1] = '\0'; + } + } + *s = '\0'; +} + +static int __init do_mount_root(char *name, char *fs, int flags, void *data) +{ + int err = sys_mount(name, "/root", fs, flags, data); + if (err) + return err; + + sys_chdir("/root"); + ROOT_DEV = current->fs->pwd.mnt->mnt_sb->s_dev; + printk("VFS: Mounted root (%s filesystem)%s.\n", + current->fs->pwd.mnt->mnt_sb->s_type->name, + current->fs->pwd.mnt->mnt_sb->s_flags & MS_RDONLY ? + " readonly" : ""); + return 0; +} + +void __init mount_block_root(char *name, int flags) +{ + char *fs_names = __getname(); + char *p; +#ifdef CONFIG_BLOCK + char b[BDEVNAME_SIZE]; +#else + const char *b = name; +#endif + + get_fs_names(fs_names); +retry: + for (p = fs_names; *p; p += strlen(p)+1) { + int err = do_mount_root(name, p, flags, root_mount_data); + switch (err) { + case 0: + goto out; + case -EACCES: + flags |= MS_RDONLY; + goto retry; + case -EINVAL: + continue; + } + /* + * Allow the user to distinguish between failed sys_open + * and bad superblock on root device. + * and give them a list of the available devices + */ +#ifdef CONFIG_BLOCK + __bdevname(ROOT_DEV, b); +#endif + printk("VFS: Cannot open root device \"%s\" or %s\n", + root_device_name, b); + printk("Please append a correct \"root=\" boot option; here are the available partitions:\n"); + + printk_all_partitions(); +#ifdef CONFIG_DEBUG_BLOCK_EXT_DEVT + printk("DEBUG_BLOCK_EXT_DEVT is enabled, you need to specify " + "explicit textual name for \"root=\" boot option.\n"); +#endif + panic("VFS: Unable to mount root fs on %s", b); + } + + printk("List of all partitions:\n"); + printk_all_partitions(); + printk("No filesystem could mount root, tried: "); + for (p = fs_names; *p; p += strlen(p)+1) + printk(" %s", p); + printk("\n"); +#ifdef CONFIG_BLOCK + __bdevname(ROOT_DEV, b); +#endif + panic("VFS: Unable to mount root fs on %s", b); +out: + putname(fs_names); +} + +#ifdef CONFIG_ROOT_NFS +static int __init mount_nfs_root(void) +{ + void *data = nfs_root_data(); + + create_dev("/dev/root", ROOT_DEV); + if (data && + do_mount_root("/dev/root", "nfs", root_mountflags, data) == 0) + return 1; + return 0; +} +#endif + +#if defined(CONFIG_BLK_DEV_RAM) || defined(CONFIG_BLK_DEV_FD) +void __init change_floppy(char *fmt, ...) +{ + struct termios termios; + char buf[80]; + char c; + int fd; + va_list args; + va_start(args, fmt); + vsprintf(buf, fmt, args); + va_end(args); + fd = sys_open("/dev/root", O_RDWR | O_NDELAY, 0); + if (fd >= 0) { + sys_ioctl(fd, FDEJECT, 0); + sys_close(fd); + } + printk(KERN_NOTICE "VFS: Insert %s and press ENTER\n", buf); + fd = sys_open("/dev/console", O_RDWR, 0); + if (fd >= 0) { + sys_ioctl(fd, TCGETS, (long)&termios); + termios.c_lflag &= ~ICANON; + sys_ioctl(fd, TCSETSF, (long)&termios); + sys_read(fd, &c, 1); + termios.c_lflag |= ICANON; + sys_ioctl(fd, TCSETSF, (long)&termios); + sys_close(fd); + } +} +#endif + +void __init mount_root(void) +{ +#ifdef CONFIG_ROOT_NFS + if (MAJOR(ROOT_DEV) == UNNAMED_MAJOR) { + if (mount_nfs_root()) + return; + + printk(KERN_ERR "VFS: Unable to mount root fs via NFS, trying floppy.\n"); + ROOT_DEV = Root_FD0; + } +#endif +#ifdef CONFIG_BLK_DEV_FD + if (MAJOR(ROOT_DEV) == FLOPPY_MAJOR) { + /* rd_doload is 2 for a dual initrd/ramload setup */ + if (rd_doload==2) { + if (rd_load_disk(1)) { + ROOT_DEV = Root_RAM1; + root_device_name = NULL; + } + } else + change_floppy("root floppy"); + } +#endif +#ifdef CONFIG_BLOCK + create_dev("/dev/root", ROOT_DEV); + mount_block_root("/dev/root", root_mountflags); +#endif +} + +/* + * Prepare the namespace - decide what/where to mount, load ramdisks, etc. + */ +void __init prepare_namespace(void) +{ + int is_floppy; + + if (root_delay) { + printk(KERN_INFO "Waiting %dsec before mounting root device...\n", + root_delay); + ssleep(root_delay); + } + + /* wait for the known devices to complete their probing */ + while (driver_probe_done() != 0) + msleep(100); + + md_run_setup(); + + if (saved_root_name[0]) { + root_device_name = saved_root_name; + if (!strncmp(root_device_name, "mtd", 3) || + !strncmp(root_device_name, "ubi", 3)) { + mount_block_root(root_device_name, root_mountflags); + goto out; + } + ROOT_DEV = name_to_dev_t(root_device_name); + if (strncmp(root_device_name, "/dev/", 5) == 0) + root_device_name += 5; + } + + if (initrd_load()) + goto out; + + /* wait for any asynchronous scanning to complete */ + if ((ROOT_DEV == 0) && root_wait) { + printk(KERN_INFO "Waiting for root device %s...\n", + saved_root_name); + while (driver_probe_done() != 0 || + (ROOT_DEV = name_to_dev_t(saved_root_name)) == 0) + msleep(100); + } + + is_floppy = MAJOR(ROOT_DEV) == FLOPPY_MAJOR; + + if (is_floppy && rd_doload && rd_load_disk(0)) + ROOT_DEV = Root_RAM0; + + mount_root(); +out: + sys_mount(".", "/", NULL, MS_MOVE, NULL); + sys_chroot("."); +} + diff --git a/init/do_mounts.h b/init/do_mounts.h new file mode 100644 index 0000000..9aa968d --- /dev/null +++ b/init/do_mounts.h @@ -0,0 +1,75 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +void change_floppy(char *fmt, ...); +void mount_block_root(char *name, int flags); +void mount_root(void); +extern int root_mountflags; + +static inline int create_dev(char *name, dev_t dev) +{ + sys_unlink(name); + return sys_mknod(name, S_IFBLK|0600, new_encode_dev(dev)); +} + +#if BITS_PER_LONG == 32 +static inline u32 bstat(char *name) +{ + struct stat64 stat; + if (sys_stat64(name, &stat) != 0) + return 0; + if (!S_ISBLK(stat.st_mode)) + return 0; + if (stat.st_rdev != (u32)stat.st_rdev) + return 0; + return stat.st_rdev; +} +#else +static inline u32 bstat(char *name) +{ + struct stat stat; + if (sys_newstat(name, &stat) != 0) + return 0; + if (!S_ISBLK(stat.st_mode)) + return 0; + return stat.st_rdev; +} +#endif + +#ifdef CONFIG_BLK_DEV_RAM + +int __init rd_load_disk(int n); +int __init rd_load_image(char *from); + +#else + +static inline int rd_load_disk(int n) { return 0; } +static inline int rd_load_image(char *from) { return 0; } + +#endif + +#ifdef CONFIG_BLK_DEV_INITRD + +int __init initrd_load(void); + +#else + +static inline int initrd_load(void) { return 0; } + +#endif + +#ifdef CONFIG_BLK_DEV_MD + +void md_run_setup(void); + +#else + +static inline void md_run_setup(void) {} + +#endif diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c new file mode 100644 index 0000000..614241b --- /dev/null +++ b/init/do_mounts_initrd.c @@ -0,0 +1,129 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "do_mounts.h" + +unsigned long initrd_start, initrd_end; +int initrd_below_start_ok; +unsigned int real_root_dev; /* do_proc_dointvec cannot handle kdev_t */ +static int __initdata old_fd, root_fd; +static int __initdata mount_initrd = 1; + +static int __init no_initrd(char *str) +{ + mount_initrd = 0; + return 1; +} + +__setup("noinitrd", no_initrd); + +static int __init do_linuxrc(void * shell) +{ + static char *argv[] = { "linuxrc", NULL, }; + extern char * envp_init[]; + + sys_close(old_fd);sys_close(root_fd); + sys_close(0);sys_close(1);sys_close(2); + sys_setsid(); + (void) sys_open("/dev/console",O_RDWR,0); + (void) sys_dup(0); + (void) sys_dup(0); + return kernel_execve(shell, argv, envp_init); +} + +static void __init handle_initrd(void) +{ + int error; + int pid; + + real_root_dev = new_encode_dev(ROOT_DEV); + create_dev("/dev/root.old", Root_RAM0); + /* mount initrd on rootfs' /root */ + mount_block_root("/dev/root.old", root_mountflags & ~MS_RDONLY); + sys_mkdir("/old", 0700); + root_fd = sys_open("/", 0, 0); + old_fd = sys_open("/old", 0, 0); + /* move initrd over / and chdir/chroot in initrd root */ + sys_chdir("/root"); + sys_mount(".", "/", NULL, MS_MOVE, NULL); + sys_chroot("."); + + /* + * In case that a resume from disk is carried out by linuxrc or one of + * its children, we need to tell the freezer not to wait for us. + */ + current->flags |= PF_FREEZER_SKIP; + + pid = kernel_thread(do_linuxrc, "/linuxrc", SIGCHLD); + if (pid > 0) + while (pid != sys_wait4(-1, NULL, 0, NULL)) + yield(); + + current->flags &= ~PF_FREEZER_SKIP; + + /* move initrd to rootfs' /old */ + sys_fchdir(old_fd); + sys_mount("/", ".", NULL, MS_MOVE, NULL); + /* switch root and cwd back to / of rootfs */ + sys_fchdir(root_fd); + sys_chroot("."); + sys_close(old_fd); + sys_close(root_fd); + + if (new_decode_dev(real_root_dev) == Root_RAM0) { + sys_chdir("/old"); + return; + } + + ROOT_DEV = new_decode_dev(real_root_dev); + mount_root(); + + printk(KERN_NOTICE "Trying to move old root to /initrd ... "); + error = sys_mount("/old", "/root/initrd", NULL, MS_MOVE, NULL); + if (!error) + printk("okay\n"); + else { + int fd = sys_open("/dev/root.old", O_RDWR, 0); + if (error == -ENOENT) + printk("/initrd does not exist. Ignored.\n"); + else + printk("failed\n"); + printk(KERN_NOTICE "Unmounting old root\n"); + sys_umount("/old", MNT_DETACH); + printk(KERN_NOTICE "Trying to free ramdisk memory ... "); + if (fd < 0) { + error = fd; + } else { + error = sys_ioctl(fd, BLKFLSBUF, 0); + sys_close(fd); + } + printk(!error ? "okay\n" : "failed\n"); + } +} + +int __init initrd_load(void) +{ + if (mount_initrd) { + create_dev("/dev/ram", Root_RAM0); + /* + * Load the initrd data into /dev/ram0. Execute it as initrd + * unless /dev/ram0 is supposed to be our actual root device, + * in that case the ram disk is just set up here, and gets + * mounted in the normal path. + */ + if (rd_load_image("/initrd.image") && ROOT_DEV != Root_RAM0) { + sys_unlink("/initrd.image"); + handle_initrd(); + return 1; + } + } + sys_unlink("/initrd.image"); + return 0; +} diff --git a/init/do_mounts_md.c b/init/do_mounts_md.c new file mode 100644 index 0000000..d6da5cd --- /dev/null +++ b/init/do_mounts_md.c @@ -0,0 +1,302 @@ +#include +#include + +#include "do_mounts.h" + +/* + * When md (and any require personalities) are compiled into the kernel + * (not a module), arrays can be assembles are boot time using with AUTODETECT + * where specially marked partitions are registered with md_autodetect_dev(), + * and with MD_BOOT where devices to be collected are given on the boot line + * with md=..... + * The code for that is here. + */ + +#ifdef CONFIG_MD_AUTODETECT +static int __initdata raid_noautodetect; +#else +static int __initdata raid_noautodetect=1; +#endif +static int __initdata raid_autopart; + +static struct { + int minor; + int partitioned; + int level; + int chunk; + char *device_names; +} md_setup_args[256] __initdata; + +static int md_setup_ents __initdata; + +/* + * Parse the command-line parameters given our kernel, but do not + * actually try to invoke the MD device now; that is handled by + * md_setup_drive after the low-level disk drivers have initialised. + * + * 27/11/1999: Fixed to work correctly with the 2.3 kernel (which + * assigns the task of parsing integer arguments to the + * invoked program now). Added ability to initialise all + * the MD devices (by specifying multiple "md=" lines) + * instead of just one. -- KTK + * 18May2000: Added support for persistent-superblock arrays: + * md=n,0,factor,fault,device-list uses RAID0 for device n + * md=n,-1,factor,fault,device-list uses LINEAR for device n + * md=n,device-list reads a RAID superblock from the devices + * elements in device-list are read by name_to_kdev_t so can be + * a hex number or something like /dev/hda1 /dev/sdb + * 2001-06-03: Dave Cinege + * Shifted name_to_kdev_t() and related operations to md_set_drive() + * for later execution. Rewrote section to make devfs compatible. + */ +static int __init md_setup(char *str) +{ + int minor, level, factor, fault, partitioned = 0; + char *pername = ""; + char *str1; + int ent; + + if (*str == 'd') { + partitioned = 1; + str++; + } + if (get_option(&str, &minor) != 2) { /* MD Number */ + printk(KERN_WARNING "md: Too few arguments supplied to md=.\n"); + return 0; + } + str1 = str; + for (ent=0 ; ent< md_setup_ents ; ent++) + if (md_setup_args[ent].minor == minor && + md_setup_args[ent].partitioned == partitioned) { + printk(KERN_WARNING "md: md=%s%d, Specified more than once. " + "Replacing previous definition.\n", partitioned?"d":"", minor); + break; + } + if (ent >= ARRAY_SIZE(md_setup_args)) { + printk(KERN_WARNING "md: md=%s%d - too many md initialisations\n", partitioned?"d":"", minor); + return 0; + } + if (ent >= md_setup_ents) + md_setup_ents++; + switch (get_option(&str, &level)) { /* RAID level */ + case 2: /* could be 0 or -1.. */ + if (level == 0 || level == LEVEL_LINEAR) { + if (get_option(&str, &factor) != 2 || /* Chunk Size */ + get_option(&str, &fault) != 2) { + printk(KERN_WARNING "md: Too few arguments supplied to md=.\n"); + return 0; + } + md_setup_args[ent].level = level; + md_setup_args[ent].chunk = 1 << (factor+12); + if (level == LEVEL_LINEAR) + pername = "linear"; + else + pername = "raid0"; + break; + } + /* FALL THROUGH */ + case 1: /* the first device is numeric */ + str = str1; + /* FALL THROUGH */ + case 0: + md_setup_args[ent].level = LEVEL_NONE; + pername="super-block"; + } + + printk(KERN_INFO "md: Will configure md%d (%s) from %s, below.\n", + minor, pername, str); + md_setup_args[ent].device_names = str; + md_setup_args[ent].partitioned = partitioned; + md_setup_args[ent].minor = minor; + + return 1; +} + +#define MdpMinorShift 6 + +static void __init md_setup_drive(void) +{ + int minor, i, ent, partitioned; + dev_t dev; + dev_t devices[MD_SB_DISKS+1]; + + for (ent = 0; ent < md_setup_ents ; ent++) { + int fd; + int err = 0; + char *devname; + mdu_disk_info_t dinfo; + char name[16]; + + minor = md_setup_args[ent].minor; + partitioned = md_setup_args[ent].partitioned; + devname = md_setup_args[ent].device_names; + + sprintf(name, "/dev/md%s%d", partitioned?"_d":"", minor); + if (partitioned) + dev = MKDEV(mdp_major, minor << MdpMinorShift); + else + dev = MKDEV(MD_MAJOR, minor); + create_dev(name, dev); + for (i = 0; i < MD_SB_DISKS && devname != NULL; i++) { + char *p; + char comp_name[64]; + u32 rdev; + + p = strchr(devname, ','); + if (p) + *p++ = 0; + + dev = name_to_dev_t(devname); + if (strncmp(devname, "/dev/", 5) == 0) + devname += 5; + snprintf(comp_name, 63, "/dev/%s", devname); + rdev = bstat(comp_name); + if (rdev) + dev = new_decode_dev(rdev); + if (!dev) { + printk(KERN_WARNING "md: Unknown device name: %s\n", devname); + break; + } + + devices[i] = dev; + + devname = p; + } + devices[i] = 0; + + if (!i) + continue; + + printk(KERN_INFO "md: Loading md%s%d: %s\n", + partitioned ? "_d" : "", minor, + md_setup_args[ent].device_names); + + fd = sys_open(name, 0, 0); + if (fd < 0) { + printk(KERN_ERR "md: open failed - cannot start " + "array %s\n", name); + continue; + } + if (sys_ioctl(fd, SET_ARRAY_INFO, 0) == -EBUSY) { + printk(KERN_WARNING + "md: Ignoring md=%d, already autodetected. (Use raid=noautodetect)\n", + minor); + sys_close(fd); + continue; + } + + if (md_setup_args[ent].level != LEVEL_NONE) { + /* non-persistent */ + mdu_array_info_t ainfo; + ainfo.level = md_setup_args[ent].level; + ainfo.size = 0; + ainfo.nr_disks =0; + ainfo.raid_disks =0; + while (devices[ainfo.raid_disks]) + ainfo.raid_disks++; + ainfo.md_minor =minor; + ainfo.not_persistent = 1; + + ainfo.state = (1 << MD_SB_CLEAN); + ainfo.layout = 0; + ainfo.chunk_size = md_setup_args[ent].chunk; + err = sys_ioctl(fd, SET_ARRAY_INFO, (long)&ainfo); + for (i = 0; !err && i <= MD_SB_DISKS; i++) { + dev = devices[i]; + if (!dev) + break; + dinfo.number = i; + dinfo.raid_disk = i; + dinfo.state = (1<= 0) { + sys_ioctl(fd, RAID_AUTORUN, raid_autopart); + sys_close(fd); + } +} + +void __init md_run_setup(void) +{ + create_dev("/dev/md0", MKDEV(MD_MAJOR, 0)); + + if (raid_noautodetect) + printk(KERN_INFO "md: Skipping autodetection of RAID arrays. (raid=autodetect will force)\n"); + else + autodetect_raid(); + md_setup_drive(); +} diff --git a/init/do_mounts_rd.c b/init/do_mounts_rd.c new file mode 100644 index 0000000..d48990e --- /dev/null +++ b/init/do_mounts_rd.c @@ -0,0 +1,358 @@ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "do_mounts.h" +#include "../fs/squashfs/squashfs_fs.h" + +#include + +#include +#include +#include + +int __initdata rd_prompt = 1;/* 1 = prompt for RAM disk, 0 = don't prompt */ + +static int __init prompt_ramdisk(char *str) +{ + rd_prompt = simple_strtol(str,NULL,0) & 1; + return 1; +} +__setup("prompt_ramdisk=", prompt_ramdisk); + +int __initdata rd_image_start; /* starting block # of image */ + +static int __init ramdisk_start_setup(char *str) +{ + rd_image_start = simple_strtol(str,NULL,0); + return 1; +} +__setup("ramdisk_start=", ramdisk_start_setup); + +static int __init crd_load(int in_fd, int out_fd, decompress_fn deco); + +/* + * This routine tries to find a RAM disk image to load, and returns the + * number of blocks to read for a non-compressed image, 0 if the image + * is a compressed image, and -1 if an image with the right magic + * numbers could not be found. + * + * We currently check for the following magic numbers: + * minix + * ext2 + * romfs + * cramfs + * squashfs + * gzip + */ +static int __init +identify_ramdisk_image(int fd, int start_block, decompress_fn *decompressor) +{ + const int size = 512; + struct minix_super_block *minixsb; + struct ext2_super_block *ext2sb; + struct romfs_super_block *romfsb; + struct cramfs_super *cramfsb; + struct squashfs_super_block *squashfsb; + int nblocks = -1; + unsigned char *buf; + + buf = kmalloc(size, GFP_KERNEL); + if (!buf) + return -1; + + minixsb = (struct minix_super_block *) buf; + ext2sb = (struct ext2_super_block *) buf; + romfsb = (struct romfs_super_block *) buf; + cramfsb = (struct cramfs_super *) buf; + squashfsb = (struct squashfs_super_block *) buf; + memset(buf, 0xe5, size); + + /* + * Read block 0 to test for gzipped kernel + */ + sys_lseek(fd, start_block * BLOCK_SIZE, 0); + sys_read(fd, buf, size); + +#ifdef CONFIG_RD_GZIP + /* + * If it matches the gzip magic numbers, return 0 + */ + if (buf[0] == 037 && ((buf[1] == 0213) || (buf[1] == 0236))) { + printk(KERN_NOTICE + "RAMDISK: Compressed image found at block %d\n", + start_block); + *decompressor = gunzip; + nblocks = 0; + goto done; + } +#endif + +#ifdef CONFIG_RD_BZIP2 + /* + * If it matches the bzip2 magic numbers, return -1 + */ + if (buf[0] == 0x42 && (buf[1] == 0x5a)) { + printk(KERN_NOTICE + "RAMDISK: Bzipped image found at block %d\n", + start_block); + *decompressor = bunzip2; + nblocks = 0; + goto done; + } +#endif + +#ifdef CONFIG_RD_LZMA + /* + * If it matches the lzma magic numbers, return -1 + */ + if (buf[0] == 0x5d && (buf[1] == 0x00)) { + printk(KERN_NOTICE + "RAMDISK: Lzma image found at block %d\n", + start_block); + *decompressor = unlzma; + nblocks = 0; + goto done; + } +#endif + + /* romfs is at block zero too */ + if (romfsb->word0 == ROMSB_WORD0 && + romfsb->word1 == ROMSB_WORD1) { + printk(KERN_NOTICE + "RAMDISK: romfs filesystem found at block %d\n", + start_block); + nblocks = (ntohl(romfsb->size)+BLOCK_SIZE-1)>>BLOCK_SIZE_BITS; + goto done; + } + + if (cramfsb->magic == CRAMFS_MAGIC) { + printk(KERN_NOTICE + "RAMDISK: cramfs filesystem found at block %d\n", + start_block); + nblocks = (cramfsb->size + BLOCK_SIZE - 1) >> BLOCK_SIZE_BITS; + goto done; + } + + /* squashfs is at block zero too */ + if (le32_to_cpu(squashfsb->s_magic) == SQUASHFS_MAGIC) { + printk(KERN_NOTICE + "RAMDISK: squashfs filesystem found at block %d\n", + start_block); + nblocks = (le64_to_cpu(squashfsb->bytes_used) + BLOCK_SIZE - 1) + >> BLOCK_SIZE_BITS; + goto done; + } + + /* + * Read block 1 to test for minix and ext2 superblock + */ + sys_lseek(fd, (start_block+1) * BLOCK_SIZE, 0); + sys_read(fd, buf, size); + + /* Try minix */ + if (minixsb->s_magic == MINIX_SUPER_MAGIC || + minixsb->s_magic == MINIX_SUPER_MAGIC2) { + printk(KERN_NOTICE + "RAMDISK: Minix filesystem found at block %d\n", + start_block); + nblocks = minixsb->s_nzones << minixsb->s_log_zone_size; + goto done; + } + + /* Try ext2 */ + if (ext2sb->s_magic == cpu_to_le16(EXT2_SUPER_MAGIC)) { + printk(KERN_NOTICE + "RAMDISK: ext2 filesystem found at block %d\n", + start_block); + nblocks = le32_to_cpu(ext2sb->s_blocks_count) << + le32_to_cpu(ext2sb->s_log_block_size); + goto done; + } + + printk(KERN_NOTICE + "RAMDISK: Couldn't find valid RAM disk image starting at %d.\n", + start_block); + +done: + sys_lseek(fd, start_block * BLOCK_SIZE, 0); + kfree(buf); + return nblocks; +} + +int __init rd_load_image(char *from) +{ + int res = 0; + int in_fd, out_fd; + unsigned long rd_blocks, devblocks; + int nblocks, i, disk; + char *buf = NULL; + unsigned short rotate = 0; + decompress_fn decompressor = NULL; +#if !defined(CONFIG_S390) && !defined(CONFIG_PPC_ISERIES) + char rotator[4] = { '|' , '/' , '-' , '\\' }; +#endif + + out_fd = sys_open("/dev/ram", O_RDWR, 0); + if (out_fd < 0) + goto out; + + in_fd = sys_open(from, O_RDONLY, 0); + if (in_fd < 0) + goto noclose_input; + + nblocks = identify_ramdisk_image(in_fd, rd_image_start, &decompressor); + if (nblocks < 0) + goto done; + + if (nblocks == 0) { + if (crd_load(in_fd, out_fd, decompressor) == 0) + goto successful_load; + goto done; + } + + /* + * NOTE NOTE: nblocks is not actually blocks but + * the number of kibibytes of data to load into a ramdisk. + * So any ramdisk block size that is a multiple of 1KiB should + * work when the appropriate ramdisk_blocksize is specified + * on the command line. + * + * The default ramdisk_blocksize is 1KiB and it is generally + * silly to use anything else, so make sure to use 1KiB + * blocksize while generating ext2fs ramdisk-images. + */ + if (sys_ioctl(out_fd, BLKGETSIZE, (unsigned long)&rd_blocks) < 0) + rd_blocks = 0; + else + rd_blocks >>= 1; + + if (nblocks > rd_blocks) { + printk("RAMDISK: image too big! (%dKiB/%ldKiB)\n", + nblocks, rd_blocks); + goto done; + } + + /* + * OK, time to copy in the data + */ + if (sys_ioctl(in_fd, BLKGETSIZE, (unsigned long)&devblocks) < 0) + devblocks = 0; + else + devblocks >>= 1; + + if (strcmp(from, "/initrd.image") == 0) + devblocks = nblocks; + + if (devblocks == 0) { + printk(KERN_ERR "RAMDISK: could not determine device size\n"); + goto done; + } + + buf = kmalloc(BLOCK_SIZE, GFP_KERNEL); + if (!buf) { + printk(KERN_ERR "RAMDISK: could not allocate buffer\n"); + goto done; + } + + printk(KERN_NOTICE "RAMDISK: Loading %dKiB [%ld disk%s] into ram disk... ", + nblocks, ((nblocks-1)/devblocks)+1, nblocks>devblocks ? "s" : ""); + for (i = 0, disk = 1; i < nblocks; i++) { + if (i && (i % devblocks == 0)) { + printk("done disk #%d.\n", disk++); + rotate = 0; + if (sys_close(in_fd)) { + printk("Error closing the disk.\n"); + goto noclose_input; + } + change_floppy("disk #%d", disk); + in_fd = sys_open(from, O_RDONLY, 0); + if (in_fd < 0) { + printk("Error opening disk.\n"); + goto noclose_input; + } + printk("Loading disk #%d... ", disk); + } + sys_read(in_fd, buf, BLOCK_SIZE); + sys_write(out_fd, buf, BLOCK_SIZE); +#if !defined(CONFIG_S390) && !defined(CONFIG_PPC_ISERIES) + if (!(i % 16)) { + printk("%c\b", rotator[rotate & 0x3]); + rotate++; + } +#endif + } + printk("done.\n"); + +successful_load: + res = 1; +done: + sys_close(in_fd); +noclose_input: + sys_close(out_fd); +out: + kfree(buf); + sys_unlink("/dev/ram"); + return res; +} + +int __init rd_load_disk(int n) +{ + if (rd_prompt) + change_floppy("root floppy disk to be loaded into RAM disk"); + create_dev("/dev/root", ROOT_DEV); + create_dev("/dev/ram", MKDEV(RAMDISK_MAJOR, n)); + return rd_load_image("/dev/root"); +} + +static int exit_code; +static int decompress_error; +static int crd_infd, crd_outfd; + +static int __init compr_fill(void *buf, unsigned int len) +{ + int r = sys_read(crd_infd, buf, len); + if (r < 0) + printk(KERN_ERR "RAMDISK: error while reading compressed data"); + else if (r == 0) + printk(KERN_ERR "RAMDISK: EOF while reading compressed data"); + return r; +} + +static int __init compr_flush(void *window, unsigned int outcnt) +{ + int written = sys_write(crd_outfd, window, outcnt); + if (written != outcnt) { + if (decompress_error == 0) + printk(KERN_ERR + "RAMDISK: incomplete write (%d != %d)\n", + written, outcnt); + decompress_error = 1; + return -1; + } + return outcnt; +} + +static void __init error(char *x) +{ + printk(KERN_ERR "%s\n", x); + exit_code = 1; + decompress_error = 1; +} + +static int __init crd_load(int in_fd, int out_fd, decompress_fn deco) +{ + int result; + crd_infd = in_fd; + crd_outfd = out_fd; + result = deco(NULL, 0, compr_fill, compr_flush, NULL, NULL, error); + if (decompress_error) + result = 1; + return result; +} diff --git a/init/do_mounts_rd.c.orig b/init/do_mounts_rd.c.orig new file mode 100644 index 0000000..dcaeb1f --- /dev/null +++ b/init/do_mounts_rd.c.orig @@ -0,0 +1,344 @@ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "do_mounts.h" + +#include + +#include +#include +#include + +int __initdata rd_prompt = 1;/* 1 = prompt for RAM disk, 0 = don't prompt */ + +static int __init prompt_ramdisk(char *str) +{ + rd_prompt = simple_strtol(str,NULL,0) & 1; + return 1; +} +__setup("prompt_ramdisk=", prompt_ramdisk); + +int __initdata rd_image_start; /* starting block # of image */ + +static int __init ramdisk_start_setup(char *str) +{ + rd_image_start = simple_strtol(str,NULL,0); + return 1; +} +__setup("ramdisk_start=", ramdisk_start_setup); + +static int __init crd_load(int in_fd, int out_fd, decompress_fn deco); + +/* + * This routine tries to find a RAM disk image to load, and returns the + * number of blocks to read for a non-compressed image, 0 if the image + * is a compressed image, and -1 if an image with the right magic + * numbers could not be found. + * + * We currently check for the following magic numbers: + * minix + * ext2 + * romfs + * cramfs + * gzip + */ +static int __init +identify_ramdisk_image(int fd, int start_block, decompress_fn *decompressor) +{ + const int size = 512; + struct minix_super_block *minixsb; + struct ext2_super_block *ext2sb; + struct romfs_super_block *romfsb; + struct cramfs_super *cramfsb; + int nblocks = -1; + unsigned char *buf; + + buf = kmalloc(size, GFP_KERNEL); + if (!buf) + return -1; + + minixsb = (struct minix_super_block *) buf; + ext2sb = (struct ext2_super_block *) buf; + romfsb = (struct romfs_super_block *) buf; + cramfsb = (struct cramfs_super *) buf; + memset(buf, 0xe5, size); + + /* + * Read block 0 to test for gzipped kernel + */ + sys_lseek(fd, start_block * BLOCK_SIZE, 0); + sys_read(fd, buf, size); + +#ifdef CONFIG_RD_GZIP + /* + * If it matches the gzip magic numbers, return 0 + */ + if (buf[0] == 037 && ((buf[1] == 0213) || (buf[1] == 0236))) { + printk(KERN_NOTICE + "RAMDISK: Compressed image found at block %d\n", + start_block); + *decompressor = gunzip; + nblocks = 0; + goto done; + } +#endif + +#ifdef CONFIG_RD_BZIP2 + /* + * If it matches the bzip2 magic numbers, return -1 + */ + if (buf[0] == 0x42 && (buf[1] == 0x5a)) { + printk(KERN_NOTICE + "RAMDISK: Bzipped image found at block %d\n", + start_block); + *decompressor = bunzip2; + nblocks = 0; + goto done; + } +#endif + +#ifdef CONFIG_RD_LZMA + /* + * If it matches the lzma magic numbers, return -1 + */ + if (buf[0] == 0x5d && (buf[1] == 0x00)) { + printk(KERN_NOTICE + "RAMDISK: Lzma image found at block %d\n", + start_block); + *decompressor = unlzma; + nblocks = 0; + goto done; + } +#endif + + /* romfs is at block zero too */ + if (romfsb->word0 == ROMSB_WORD0 && + romfsb->word1 == ROMSB_WORD1) { + printk(KERN_NOTICE + "RAMDISK: romfs filesystem found at block %d\n", + start_block); + nblocks = (ntohl(romfsb->size)+BLOCK_SIZE-1)>>BLOCK_SIZE_BITS; + goto done; + } + + if (cramfsb->magic == CRAMFS_MAGIC) { + printk(KERN_NOTICE + "RAMDISK: cramfs filesystem found at block %d\n", + start_block); + nblocks = (cramfsb->size + BLOCK_SIZE - 1) >> BLOCK_SIZE_BITS; + goto done; + } + + /* + * Read block 1 to test for minix and ext2 superblock + */ + sys_lseek(fd, (start_block+1) * BLOCK_SIZE, 0); + sys_read(fd, buf, size); + + /* Try minix */ + if (minixsb->s_magic == MINIX_SUPER_MAGIC || + minixsb->s_magic == MINIX_SUPER_MAGIC2) { + printk(KERN_NOTICE + "RAMDISK: Minix filesystem found at block %d\n", + start_block); + nblocks = minixsb->s_nzones << minixsb->s_log_zone_size; + goto done; + } + + /* Try ext2 */ + if (ext2sb->s_magic == cpu_to_le16(EXT2_SUPER_MAGIC)) { + printk(KERN_NOTICE + "RAMDISK: ext2 filesystem found at block %d\n", + start_block); + nblocks = le32_to_cpu(ext2sb->s_blocks_count) << + le32_to_cpu(ext2sb->s_log_block_size); + goto done; + } + + printk(KERN_NOTICE + "RAMDISK: Couldn't find valid RAM disk image starting at %d.\n", + start_block); + +done: + sys_lseek(fd, start_block * BLOCK_SIZE, 0); + kfree(buf); + return nblocks; +} + +int __init rd_load_image(char *from) +{ + int res = 0; + int in_fd, out_fd; + unsigned long rd_blocks, devblocks; + int nblocks, i, disk; + char *buf = NULL; + unsigned short rotate = 0; + decompress_fn decompressor = NULL; +#if !defined(CONFIG_S390) && !defined(CONFIG_PPC_ISERIES) + char rotator[4] = { '|' , '/' , '-' , '\\' }; +#endif + + out_fd = sys_open("/dev/ram", O_RDWR, 0); + if (out_fd < 0) + goto out; + + in_fd = sys_open(from, O_RDONLY, 0); + if (in_fd < 0) + goto noclose_input; + + nblocks = identify_ramdisk_image(in_fd, rd_image_start, &decompressor); + if (nblocks < 0) + goto done; + + if (nblocks == 0) { + if (crd_load(in_fd, out_fd, decompressor) == 0) + goto successful_load; + goto done; + } + + /* + * NOTE NOTE: nblocks is not actually blocks but + * the number of kibibytes of data to load into a ramdisk. + * So any ramdisk block size that is a multiple of 1KiB should + * work when the appropriate ramdisk_blocksize is specified + * on the command line. + * + * The default ramdisk_blocksize is 1KiB and it is generally + * silly to use anything else, so make sure to use 1KiB + * blocksize while generating ext2fs ramdisk-images. + */ + if (sys_ioctl(out_fd, BLKGETSIZE, (unsigned long)&rd_blocks) < 0) + rd_blocks = 0; + else + rd_blocks >>= 1; + + if (nblocks > rd_blocks) { + printk("RAMDISK: image too big! (%dKiB/%ldKiB)\n", + nblocks, rd_blocks); + goto done; + } + + /* + * OK, time to copy in the data + */ + if (sys_ioctl(in_fd, BLKGETSIZE, (unsigned long)&devblocks) < 0) + devblocks = 0; + else + devblocks >>= 1; + + if (strcmp(from, "/initrd.image") == 0) + devblocks = nblocks; + + if (devblocks == 0) { + printk(KERN_ERR "RAMDISK: could not determine device size\n"); + goto done; + } + + buf = kmalloc(BLOCK_SIZE, GFP_KERNEL); + if (!buf) { + printk(KERN_ERR "RAMDISK: could not allocate buffer\n"); + goto done; + } + + printk(KERN_NOTICE "RAMDISK: Loading %dKiB [%ld disk%s] into ram disk... ", + nblocks, ((nblocks-1)/devblocks)+1, nblocks>devblocks ? "s" : ""); + for (i = 0, disk = 1; i < nblocks; i++) { + if (i && (i % devblocks == 0)) { + printk("done disk #%d.\n", disk++); + rotate = 0; + if (sys_close(in_fd)) { + printk("Error closing the disk.\n"); + goto noclose_input; + } + change_floppy("disk #%d", disk); + in_fd = sys_open(from, O_RDONLY, 0); + if (in_fd < 0) { + printk("Error opening disk.\n"); + goto noclose_input; + } + printk("Loading disk #%d... ", disk); + } + sys_read(in_fd, buf, BLOCK_SIZE); + sys_write(out_fd, buf, BLOCK_SIZE); +#if !defined(CONFIG_S390) && !defined(CONFIG_PPC_ISERIES) + if (!(i % 16)) { + printk("%c\b", rotator[rotate & 0x3]); + rotate++; + } +#endif + } + printk("done.\n"); + +successful_load: + res = 1; +done: + sys_close(in_fd); +noclose_input: + sys_close(out_fd); +out: + kfree(buf); + sys_unlink("/dev/ram"); + return res; +} + +int __init rd_load_disk(int n) +{ + if (rd_prompt) + change_floppy("root floppy disk to be loaded into RAM disk"); + create_dev("/dev/root", ROOT_DEV); + create_dev("/dev/ram", MKDEV(RAMDISK_MAJOR, n)); + return rd_load_image("/dev/root"); +} + +static int exit_code; +static int decompress_error; +static int crd_infd, crd_outfd; + +static int __init compr_fill(void *buf, unsigned int len) +{ + int r = sys_read(crd_infd, buf, len); + if (r < 0) + printk(KERN_ERR "RAMDISK: error while reading compressed data"); + else if (r == 0) + printk(KERN_ERR "RAMDISK: EOF while reading compressed data"); + return r; +} + +static int __init compr_flush(void *window, unsigned int outcnt) +{ + int written = sys_write(crd_outfd, window, outcnt); + if (written != outcnt) { + if (decompress_error == 0) + printk(KERN_ERR + "RAMDISK: incomplete write (%d != %d)\n", + written, outcnt); + decompress_error = 1; + return -1; + } + return outcnt; +} + +static void __init error(char *x) +{ + printk(KERN_ERR "%s\n", x); + exit_code = 1; + decompress_error = 1; +} + +static int __init crd_load(int in_fd, int out_fd, decompress_fn deco) +{ + int result; + crd_infd = in_fd; + crd_outfd = out_fd; + result = deco(NULL, 0, compr_fill, compr_flush, NULL, NULL, error); + if (decompress_error) + result = 1; + return result; +} diff --git a/init/initramfs.c b/init/initramfs.c new file mode 100644 index 0000000..40bd4fb --- /dev/null +++ b/init/initramfs.c @@ -0,0 +1,576 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static __initdata char *message; +static void __init error(char *x) +{ + if (!message) + message = x; +} + +/* link hash */ + +#define N_ALIGN(len) ((((len) + 1) & ~3) + 2) + +static __initdata struct hash { + int ino, minor, major; + mode_t mode; + struct hash *next; + char name[N_ALIGN(PATH_MAX)]; +} *head[32]; + +static inline int hash(int major, int minor, int ino) +{ + unsigned long tmp = ino + minor + (major << 3); + tmp += tmp >> 5; + return tmp & 31; +} + +static char __init *find_link(int major, int minor, int ino, + mode_t mode, char *name) +{ + struct hash **p, *q; + for (p = head + hash(major, minor, ino); *p; p = &(*p)->next) { + if ((*p)->ino != ino) + continue; + if ((*p)->minor != minor) + continue; + if ((*p)->major != major) + continue; + if (((*p)->mode ^ mode) & S_IFMT) + continue; + return (*p)->name; + } + q = kmalloc(sizeof(struct hash), GFP_KERNEL); + if (!q) + panic("can't allocate link hash entry"); + q->major = major; + q->minor = minor; + q->ino = ino; + q->mode = mode; + strcpy(q->name, name); + q->next = NULL; + *p = q; + return NULL; +} + +static void __init free_hash(void) +{ + struct hash **p, *q; + for (p = head; p < head + 32; p++) { + while (*p) { + q = *p; + *p = q->next; + kfree(q); + } + } +} + +static long __init do_utime(char __user *filename, time_t mtime) +{ + struct timespec t[2]; + + t[0].tv_sec = mtime; + t[0].tv_nsec = 0; + t[1].tv_sec = mtime; + t[1].tv_nsec = 0; + + return do_utimes(AT_FDCWD, filename, t, AT_SYMLINK_NOFOLLOW); +} + +static __initdata LIST_HEAD(dir_list); +struct dir_entry { + struct list_head list; + char *name; + time_t mtime; +}; + +static void __init dir_add(const char *name, time_t mtime) +{ + struct dir_entry *de = kmalloc(sizeof(struct dir_entry), GFP_KERNEL); + if (!de) + panic("can't allocate dir_entry buffer"); + INIT_LIST_HEAD(&de->list); + de->name = kstrdup(name, GFP_KERNEL); + de->mtime = mtime; + list_add(&de->list, &dir_list); +} + +static void __init dir_utime(void) +{ + struct dir_entry *de, *tmp; + list_for_each_entry_safe(de, tmp, &dir_list, list) { + list_del(&de->list); + do_utime(de->name, de->mtime); + kfree(de->name); + kfree(de); + } +} + +static __initdata time_t mtime; + +/* cpio header parsing */ + +static __initdata unsigned long ino, major, minor, nlink; +static __initdata mode_t mode; +static __initdata unsigned long body_len, name_len; +static __initdata uid_t uid; +static __initdata gid_t gid; +static __initdata unsigned rdev; + +static void __init parse_header(char *s) +{ + unsigned long parsed[12]; + char buf[9]; + int i; + + buf[8] = '\0'; + for (i = 0, s += 6; i < 12; i++, s += 8) { + memcpy(buf, s, 8); + parsed[i] = simple_strtoul(buf, NULL, 16); + } + ino = parsed[0]; + mode = parsed[1]; + uid = parsed[2]; + gid = parsed[3]; + nlink = parsed[4]; + mtime = parsed[5]; + body_len = parsed[6]; + major = parsed[7]; + minor = parsed[8]; + rdev = new_encode_dev(MKDEV(parsed[9], parsed[10])); + name_len = parsed[11]; +} + +/* FSM */ + +static __initdata enum state { + Start, + Collect, + GotHeader, + SkipIt, + GotName, + CopyFile, + GotSymlink, + Reset +} state, next_state; + +static __initdata char *victim; +static __initdata unsigned count; +static __initdata loff_t this_header, next_header; + +static __initdata int dry_run; + +static inline void __init eat(unsigned n) +{ + victim += n; + this_header += n; + count -= n; +} + +static __initdata char *vcollected; +static __initdata char *collected; +static __initdata int remains; +static __initdata char *collect; + +static void __init read_into(char *buf, unsigned size, enum state next) +{ + if (count >= size) { + collected = victim; + eat(size); + state = next; + } else { + collect = collected = buf; + remains = size; + next_state = next; + state = Collect; + } +} + +static __initdata char *header_buf, *symlink_buf, *name_buf; + +static int __init do_start(void) +{ + read_into(header_buf, 110, GotHeader); + return 0; +} + +static int __init do_collect(void) +{ + unsigned n = remains; + if (count < n) + n = count; + memcpy(collect, victim, n); + eat(n); + collect += n; + if ((remains -= n) != 0) + return 1; + state = next_state; + return 0; +} + +static int __init do_header(void) +{ + if (memcmp(collected, "070707", 6)==0) { + error("incorrect cpio method used: use -H newc option"); + return 1; + } + if (memcmp(collected, "070701", 6)) { + error("no cpio magic"); + return 1; + } + parse_header(collected); + next_header = this_header + N_ALIGN(name_len) + body_len; + next_header = (next_header + 3) & ~3; + if (dry_run) { + read_into(name_buf, N_ALIGN(name_len), GotName); + return 0; + } + state = SkipIt; + if (name_len <= 0 || name_len > PATH_MAX) + return 0; + if (S_ISLNK(mode)) { + if (body_len > PATH_MAX) + return 0; + collect = collected = symlink_buf; + remains = N_ALIGN(name_len) + body_len; + next_state = GotSymlink; + state = Collect; + return 0; + } + if (S_ISREG(mode) || !body_len) + read_into(name_buf, N_ALIGN(name_len), GotName); + return 0; +} + +static int __init do_skip(void) +{ + if (this_header + count < next_header) { + eat(count); + return 1; + } else { + eat(next_header - this_header); + state = next_state; + return 0; + } +} + +static int __init do_reset(void) +{ + while(count && *victim == '\0') + eat(1); + if (count && (this_header & 3)) + error("broken padding"); + return 1; +} + +static int __init maybe_link(void) +{ + if (nlink >= 2) { + char *old = find_link(major, minor, ino, mode, collected); + if (old) + return (sys_link(old, collected) < 0) ? -1 : 1; + } + return 0; +} + +static void __init clean_path(char *path, mode_t mode) +{ + struct stat st; + + if (!sys_newlstat(path, &st) && (st.st_mode^mode) & S_IFMT) { + if (S_ISDIR(st.st_mode)) + sys_rmdir(path); + else + sys_unlink(path); + } +} + +static __initdata int wfd; + +static int __init do_name(void) +{ + state = SkipIt; + next_state = Reset; + if (strcmp(collected, "TRAILER!!!") == 0) { + free_hash(); + return 0; + } + if (dry_run) + return 0; + clean_path(collected, mode); + if (S_ISREG(mode)) { + int ml = maybe_link(); + if (ml >= 0) { + int openflags = O_WRONLY|O_CREAT; + if (ml != 1) + openflags |= O_TRUNC; + wfd = sys_open(collected, openflags, mode); + + if (wfd >= 0) { + sys_fchown(wfd, uid, gid); + sys_fchmod(wfd, mode); + vcollected = kstrdup(collected, GFP_KERNEL); + state = CopyFile; + } + } + } else if (S_ISDIR(mode)) { + sys_mkdir(collected, mode); + sys_chown(collected, uid, gid); + sys_chmod(collected, mode); + dir_add(collected, mtime); + } else if (S_ISBLK(mode) || S_ISCHR(mode) || + S_ISFIFO(mode) || S_ISSOCK(mode)) { + if (maybe_link() == 0) { + sys_mknod(collected, mode, rdev); + sys_chown(collected, uid, gid); + sys_chmod(collected, mode); + do_utime(collected, mtime); + } + } + return 0; +} + +static int __init do_copy(void) +{ + if (count >= body_len) { + sys_write(wfd, victim, body_len); + sys_close(wfd); + do_utime(vcollected, mtime); + kfree(vcollected); + eat(body_len); + state = SkipIt; + return 0; + } else { + sys_write(wfd, victim, count); + body_len -= count; + eat(count); + return 1; + } +} + +static int __init do_symlink(void) +{ + collected[N_ALIGN(name_len) + body_len] = '\0'; + clean_path(collected, 0); + sys_symlink(collected + N_ALIGN(name_len), collected); + sys_lchown(collected, uid, gid); + do_utime(collected, mtime); + state = SkipIt; + next_state = Reset; + return 0; +} + +static __initdata int (*actions[])(void) = { + [Start] = do_start, + [Collect] = do_collect, + [GotHeader] = do_header, + [SkipIt] = do_skip, + [GotName] = do_name, + [CopyFile] = do_copy, + [GotSymlink] = do_symlink, + [Reset] = do_reset, +}; + +static int __init write_buffer(char *buf, unsigned len) +{ + count = len; + victim = buf; + + while (!actions[state]()) + ; + return len - count; +} + + +static int __init flush_buffer(void *bufv, unsigned len) +{ + char *buf = (char *) bufv; + int written; + int origLen = len; + if (message) + return -1; + while ((written = write_buffer(buf, len)) < len && !message) { + char c = buf[written]; + if (c == '0') { + buf += written; + len -= written; + state = Start; + } else if (c == 0) { + buf += written; + len -= written; + state = Reset; + } else + error("junk in compressed archive"); + } + return origLen; +} + +static unsigned my_inptr; /* index of next byte to be processed in inbuf */ + +#include +#include +#include + +static char * __init unpack_to_rootfs(char *buf, unsigned len, int check_only) +{ + int written; + dry_run = check_only; + header_buf = kmalloc(110, GFP_KERNEL); + symlink_buf = kmalloc(PATH_MAX + N_ALIGN(PATH_MAX) + 1, GFP_KERNEL); + name_buf = kmalloc(N_ALIGN(PATH_MAX), GFP_KERNEL); + + if (!header_buf || !symlink_buf || !name_buf) + panic("can't allocate buffers"); + + state = Start; + this_header = 0; + message = NULL; + while (!message && len) { + loff_t saved_offset = this_header; + if (*buf == '0' && !(this_header & 3)) { + state = Start; + written = write_buffer(buf, len); + buf += written; + len -= written; + continue; + } + if (!*buf) { + buf++; + len--; + this_header++; + continue; + } + this_header = 0; + if (!gunzip(buf, len, NULL, flush_buffer, NULL, + &my_inptr, error) && + message == NULL) + goto ok; + +#ifdef CONFIG_RD_BZIP2 + message = NULL; /* Zero out message, or else cpio will + think an error has already occured */ + if (!bunzip2(buf, len, NULL, flush_buffer, NULL, + &my_inptr, error) && + message == NULL) { + goto ok; + } +#endif + +#ifdef CONFIG_RD_LZMA + message = NULL; /* Zero out message, or else cpio will + think an error has already occured */ + if (!unlzma(buf, len, NULL, flush_buffer, NULL, + &my_inptr, error) && + message == NULL) { + goto ok; + } +#endif +ok: + if (state != Reset) + error("junk in compressed archive"); + this_header = saved_offset + my_inptr; + buf += my_inptr; + len -= my_inptr; + } + dir_utime(); + kfree(name_buf); + kfree(symlink_buf); + kfree(header_buf); + return message; +} + +static int __initdata do_retain_initrd; + +static int __init retain_initrd_param(char *str) +{ + if (*str) + return 0; + do_retain_initrd = 1; + return 1; +} +__setup("retain_initrd", retain_initrd_param); + +extern char __initramfs_start[], __initramfs_end[]; +#include +#include + +static void __init free_initrd(void) +{ +#ifdef CONFIG_KEXEC + unsigned long crashk_start = (unsigned long)__va(crashk_res.start); + unsigned long crashk_end = (unsigned long)__va(crashk_res.end); +#endif + if (do_retain_initrd) + goto skip; + +#ifdef CONFIG_KEXEC + /* + * If the initrd region is overlapped with crashkernel reserved region, + * free only memory that is not part of crashkernel region. + */ + if (initrd_start < crashk_end && initrd_end > crashk_start) { + /* + * Initialize initrd memory region since the kexec boot does + * not do. + */ + memset((void *)initrd_start, 0, initrd_end - initrd_start); + if (initrd_start < crashk_start) + free_initrd_mem(initrd_start, crashk_start); + if (initrd_end > crashk_end) + free_initrd_mem(crashk_end, initrd_end); + } else +#endif + free_initrd_mem(initrd_start, initrd_end); +skip: + initrd_start = 0; + initrd_end = 0; +} + +static int __init populate_rootfs(void) +{ + char *err = unpack_to_rootfs(__initramfs_start, + __initramfs_end - __initramfs_start, 0); + if (err) + panic(err); + if (initrd_start) { +#ifdef CONFIG_BLK_DEV_RAM + int fd; + printk(KERN_INFO "checking if image is initramfs..."); + err = unpack_to_rootfs((char *)initrd_start, + initrd_end - initrd_start, 1); + if (!err) { + printk(" it is\n"); + unpack_to_rootfs((char *)initrd_start, + initrd_end - initrd_start, 0); + free_initrd(); + return 0; + } + printk("it isn't (%s); looks like an initrd\n", err); + fd = sys_open("/initrd.image", O_WRONLY|O_CREAT, 0700); + if (fd >= 0) { + sys_write(fd, (char *)initrd_start, + initrd_end - initrd_start); + sys_close(fd); + free_initrd(); + } +#else + printk(KERN_INFO "Unpacking initramfs..."); + err = unpack_to_rootfs((char *)initrd_start, + initrd_end - initrd_start, 0); + if (err) + panic(err); + printk(" done\n"); + free_initrd(); +#endif + } + return 0; +} +rootfs_initcall(populate_rootfs); diff --git a/init/main.c b/init/main.c new file mode 100644 index 0000000..7e117a2 --- /dev/null +++ b/init/main.c @@ -0,0 +1,888 @@ +/* + * linux/init/main.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + * + * GK 2/5/95 - Changed to support mounting root fs via NFS + * Added initrd & change_root: Werner Almesberger & Hans Lermen, Feb '96 + * Moan early if gcc is old, avoiding bogus kernels - Paul Gortmaker, May '96 + * Simplified starting of init: Michael A. Griffith + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#ifdef CONFIG_X86_LOCAL_APIC +#include +#endif + +/* + * This is one of the first .c files built. Error out early if we have compiler + * trouble. + */ + +#if __GNUC__ == 4 && __GNUC_MINOR__ == 1 && __GNUC_PATCHLEVEL__ == 0 +#warning gcc-4.1.0 is known to miscompile the kernel. A different compiler version is recommended. +#endif + +static int kernel_init(void *); + +extern void init_IRQ(void); +extern void fork_init(unsigned long); +extern void mca_init(void); +extern void sbus_init(void); +extern void prio_tree_init(void); +extern void radix_tree_init(void); +extern void free_initmem(void); +#ifdef CONFIG_ACPI +extern void acpi_early_init(void); +#else +static inline void acpi_early_init(void) { } +#endif +#ifndef CONFIG_DEBUG_RODATA +static inline void mark_rodata_ro(void) { } +#endif + +#ifdef CONFIG_TC +extern void tc_init(void); +#endif + +enum system_states system_state; +EXPORT_SYMBOL(system_state); + +/* + * Boot command-line arguments + */ +#define MAX_INIT_ARGS CONFIG_INIT_ENV_ARG_LIMIT +#define MAX_INIT_ENVS CONFIG_INIT_ENV_ARG_LIMIT + +extern void time_init(void); +/* Default late time init is NULL. archs can override this later. */ +void (*late_time_init)(void); +extern void softirq_init(void); + +/* Untouched command line saved by arch-specific code. */ +char __initdata boot_command_line[COMMAND_LINE_SIZE]; +/* Untouched saved command line (eg. for /proc) */ +char *saved_command_line; +/* Command line for parameter parsing */ +static char *static_command_line; + +static char *execute_command; +static char *ramdisk_execute_command; + +#ifdef CONFIG_SMP +/* Setup configured maximum number of CPUs to activate */ +unsigned int __initdata setup_max_cpus = NR_CPUS; + +/* + * Setup routine for controlling SMP activation + * + * Command-line option of "nosmp" or "maxcpus=0" will disable SMP + * activation entirely (the MPS table probe still happens, though). + * + * Command-line option of "maxcpus=", where is an integer + * greater than 0, limits the maximum number of CPUs activated in + * SMP mode to . + */ +#ifndef CONFIG_X86_IO_APIC +static inline void disable_ioapic_setup(void) {}; +#endif + +static int __init nosmp(char *str) +{ + setup_max_cpus = 0; + disable_ioapic_setup(); + return 0; +} + +early_param("nosmp", nosmp); + +static int __init maxcpus(char *str) +{ + get_option(&str, &setup_max_cpus); + if (setup_max_cpus == 0) + disable_ioapic_setup(); + + return 0; +} + +early_param("maxcpus", maxcpus); +#else +#define setup_max_cpus NR_CPUS +#endif + +/* + * If set, this is an indication to the drivers that reset the underlying + * device before going ahead with the initialization otherwise driver might + * rely on the BIOS and skip the reset operation. + * + * This is useful if kernel is booting in an unreliable environment. + * For ex. kdump situaiton where previous kernel has crashed, BIOS has been + * skipped and devices will be in unknown state. + */ +unsigned int reset_devices; +EXPORT_SYMBOL(reset_devices); + +static int __init set_reset_devices(char *str) +{ + reset_devices = 1; + return 1; +} + +__setup("reset_devices", set_reset_devices); + +static char * argv_init[MAX_INIT_ARGS+2] = { "init", NULL, }; +char * envp_init[MAX_INIT_ENVS+2] = { "HOME=/", "TERM=linux", NULL, }; +static const char *panic_later, *panic_param; + +extern struct obs_kernel_param __setup_start[], __setup_end[]; + +static int __init obsolete_checksetup(char *line) +{ + struct obs_kernel_param *p; + int had_early_param = 0; + + p = __setup_start; + do { + int n = strlen(p->str); + if (!strncmp(line, p->str, n)) { + if (p->early) { + /* Already done in parse_early_param? + * (Needs exact match on param part). + * Keep iterating, as we can have early + * params and __setups of same names 8( */ + if (line[n] == '\0' || line[n] == '=') + had_early_param = 1; + } else if (!p->setup_func) { + printk(KERN_WARNING "Parameter %s is obsolete," + " ignored\n", p->str); + return 1; + } else if (p->setup_func(line + n)) + return 1; + } + p++; + } while (p < __setup_end); + + return had_early_param; +} + +/* + * This should be approx 2 Bo*oMips to start (note initial shift), and will + * still work even if initially too large, it will just take slightly longer + */ +unsigned long loops_per_jiffy = (1<<12); + +EXPORT_SYMBOL(loops_per_jiffy); + +static int __init debug_kernel(char *str) +{ + console_loglevel = 10; + return 0; +} + +static int __init quiet_kernel(char *str) +{ + console_loglevel = 4; + return 0; +} + +early_param("debug", debug_kernel); +early_param("quiet", quiet_kernel); + +static int __init loglevel(char *str) +{ + get_option(&str, &console_loglevel); + return 0; +} + +early_param("loglevel", loglevel); + +/* + * Unknown boot options get handed to init, unless they look like + * failed parameters + */ +static int __init unknown_bootoption(char *param, char *val) +{ + /* Change NUL term back to "=", to make "param" the whole string. */ + if (val) { + /* param=val or param="val"? */ + if (val == param+strlen(param)+1) + val[-1] = '='; + else if (val == param+strlen(param)+2) { + val[-2] = '='; + memmove(val-1, val, strlen(val)+1); + val--; + } else + BUG(); + } + + /* Handle obsolete-style parameters */ + if (obsolete_checksetup(param)) + return 0; + + /* + * Preemptive maintenance for "why didn't my misspelled command + * line work?" + */ + if (strchr(param, '.') && (!val || strchr(param, '.') < val)) { + printk(KERN_ERR "Unknown boot option `%s': ignoring\n", param); + return 0; + } + + if (panic_later) + return 0; + + if (val) { + /* Environment option */ + unsigned int i; + for (i = 0; envp_init[i]; i++) { + if (i == MAX_INIT_ENVS) { + panic_later = "Too many boot env vars at `%s'"; + panic_param = param; + } + if (!strncmp(param, envp_init[i], val - param)) + break; + } + envp_init[i] = param; + } else { + /* Command line option */ + unsigned int i; + for (i = 0; argv_init[i]; i++) { + if (i == MAX_INIT_ARGS) { + panic_later = "Too many boot init vars at `%s'"; + panic_param = param; + } + } + argv_init[i] = param; + } + return 0; +} + +#ifdef CONFIG_DEBUG_PAGEALLOC +int __read_mostly debug_pagealloc_enabled = 0; +#endif + +static int __init init_setup(char *str) +{ + unsigned int i; + + execute_command = str; + /* + * In case LILO is going to boot us with default command line, + * it prepends "auto" before the whole cmdline which makes + * the shell think it should execute a script with such name. + * So we ignore all arguments entered _before_ init=... [MJ] + */ + for (i = 1; i < MAX_INIT_ARGS; i++) + argv_init[i] = NULL; + return 1; +} +__setup("init=", init_setup); + +static int __init rdinit_setup(char *str) +{ + unsigned int i; + + ramdisk_execute_command = str; + /* See "auto" comment in init_setup */ + for (i = 1; i < MAX_INIT_ARGS; i++) + argv_init[i] = NULL; + return 1; +} +__setup("rdinit=", rdinit_setup); + +#ifndef CONFIG_SMP + +#ifdef CONFIG_X86_LOCAL_APIC +static void __init smp_init(void) +{ + APIC_init_uniprocessor(); +} +#else +#define smp_init() do { } while (0) +#endif + +static inline void setup_per_cpu_areas(void) { } +static inline void setup_nr_cpu_ids(void) { } +static inline void smp_prepare_cpus(unsigned int maxcpus) { } + +#else + +#if NR_CPUS > BITS_PER_LONG +cpumask_t cpu_mask_all __read_mostly = CPU_MASK_ALL; +EXPORT_SYMBOL(cpu_mask_all); +#endif + +/* Setup number of possible processor ids */ +int nr_cpu_ids __read_mostly = NR_CPUS; +EXPORT_SYMBOL(nr_cpu_ids); + +/* An arch may set nr_cpu_ids earlier if needed, so this would be redundant */ +static void __init setup_nr_cpu_ids(void) +{ + int cpu, highest_cpu = 0; + + for_each_possible_cpu(cpu) + highest_cpu = cpu; + + nr_cpu_ids = highest_cpu + 1; +} + +#ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA +unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; + +EXPORT_SYMBOL(__per_cpu_offset); + +static void __init setup_per_cpu_areas(void) +{ + unsigned long size, i; + char *ptr; + unsigned long nr_possible_cpus = num_possible_cpus(); + + /* Copy section for each CPU (we discard the original) */ + size = ALIGN(PERCPU_ENOUGH_ROOM, PAGE_SIZE); + ptr = alloc_bootmem_pages(size * nr_possible_cpus); + + for_each_possible_cpu(i) { + __per_cpu_offset[i] = ptr - __per_cpu_start; + memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); + ptr += size; + } +} +#endif /* CONFIG_HAVE_SETUP_PER_CPU_AREA */ + +/* Called by boot processor to activate the rest. */ +static void __init smp_init(void) +{ + unsigned int cpu; + + /* + * Set up the current CPU as possible to migrate to. + * The other ones will be done by cpu_up/cpu_down() + */ + cpu = smp_processor_id(); + cpu_set(cpu, cpu_active_map); + + /* FIXME: This should be done in userspace --RR */ + for_each_present_cpu(cpu) { + if (num_online_cpus() >= setup_max_cpus) + break; + if (!cpu_online(cpu)) + cpu_up(cpu); + } + + /* Any cleanup work */ + printk(KERN_INFO "Brought up %ld CPUs\n", (long)num_online_cpus()); + smp_cpus_done(setup_max_cpus); +} + +#endif + +/* + * We need to store the untouched command line for future reference. + * We also need to store the touched command line since the parameter + * parsing is performed in place, and we should allow a component to + * store reference of name/value for future reference. + */ +static void __init setup_command_line(char *command_line) +{ + saved_command_line = alloc_bootmem(strlen (boot_command_line)+1); + static_command_line = alloc_bootmem(strlen (command_line)+1); + strcpy (saved_command_line, boot_command_line); + strcpy (static_command_line, command_line); +} + +/* + * We need to finalize in a non-__init function or else race conditions + * between the root thread and the init thread may cause start_kernel to + * be reaped by free_initmem before the root thread has proceeded to + * cpu_idle. + * + * gcc-3.4 accidentally inlines this function, so use noinline. + */ + +static void noinline __init_refok rest_init(void) + __releases(kernel_lock) +{ + int pid; + + kernel_thread(kernel_init, NULL, CLONE_FS | CLONE_SIGHAND); + numa_default_policy(); + pid = kernel_thread(kthreadd, NULL, CLONE_FS | CLONE_FILES); + kthreadd_task = find_task_by_pid_ns(pid, &init_pid_ns); + unlock_kernel(); + + /* + * The boot idle thread must execute schedule() + * at least once to get things moving: + */ + init_idle_bootup_task(current); + preempt_enable_no_resched(); + schedule(); + preempt_disable(); + + /* Call into cpu_idle with preempt disabled */ + cpu_idle(); +} + +/* Check for early params. */ +static int __init do_early_param(char *param, char *val) +{ + struct obs_kernel_param *p; + + for (p = __setup_start; p < __setup_end; p++) { + if ((p->early && strcmp(param, p->str) == 0) || + (strcmp(param, "console") == 0 && + strcmp(p->str, "earlycon") == 0) + ) { + if (p->setup_func(val) != 0) + printk(KERN_WARNING + "Malformed early option '%s'\n", param); + } + } + /* We accept everything at this stage. */ + return 0; +} + +/* Arch code calls this early on, or if not, just before other parsing. */ +void __init parse_early_param(void) +{ + static __initdata int done = 0; + static __initdata char tmp_cmdline[COMMAND_LINE_SIZE]; + + if (done) + return; + + /* All fall through to do_early_param. */ + strlcpy(tmp_cmdline, boot_command_line, COMMAND_LINE_SIZE); + parse_args("early options", tmp_cmdline, NULL, 0, do_early_param); + done = 1; +} + +/* + * Activate the first processor. + */ + +static void __init boot_cpu_init(void) +{ + int cpu = smp_processor_id(); + /* Mark the boot cpu "present", "online" etc for SMP and UP case */ + cpu_set(cpu, cpu_online_map); + cpu_set(cpu, cpu_present_map); + cpu_set(cpu, cpu_possible_map); +} + +void __init __weak smp_setup_processor_id(void) +{ +} + +void __init __weak thread_info_cache_init(void) +{ +} + +asmlinkage void __init start_kernel(void) +{ + char * command_line; + extern struct kernel_param __start___param[], __stop___param[]; + + smp_setup_processor_id(); + + /* + * Need to run as early as possible, to initialize the + * lockdep hash: + */ + unwind_init(); + lockdep_init(); + debug_objects_early_init(); + cgroup_init_early(); + + local_irq_disable(); + early_boot_irqs_off(); + early_init_irq_lock_class(); + +/* + * Interrupts are still disabled. Do necessary setups, then + * enable them + */ + lock_kernel(); + tick_init(); + boot_cpu_init(); + page_address_init(); + printk(KERN_NOTICE); + printk(linux_banner); + setup_arch(&command_line); + mm_init_owner(&init_mm, &init_task); + setup_command_line(command_line); + unwind_setup(); + setup_per_cpu_areas(); + setup_nr_cpu_ids(); + smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */ + + /* + * Set up the scheduler prior starting any interrupts (such as the + * timer interrupt). Full topology setup happens at smp_init() + * time - but meanwhile we still have a functioning scheduler. + */ + sched_init(); + /* + * Disable preemption - early bootup scheduling is extremely + * fragile until we cpu_idle() for the first time. + */ + preempt_disable(); + build_all_zonelists(); + page_alloc_init(); + printk(KERN_NOTICE "Kernel command line: %s\n", boot_command_line); + parse_early_param(); + parse_args("Booting kernel", static_command_line, __start___param, + __stop___param - __start___param, + &unknown_bootoption); + if (!irqs_disabled()) { + printk(KERN_WARNING "start_kernel(): bug: interrupts were " + "enabled *very* early, fixing it\n"); + local_irq_disable(); + } + sort_main_extable(); + trap_init(); + rcu_init(); + init_IRQ(); + pidhash_init(); + init_timers(); + hrtimers_init(); + softirq_init(); + timekeeping_init(); + time_init(); + sched_clock_init(); + profile_init(); + if (!irqs_disabled()) + printk("start_kernel(): bug: interrupts were enabled early\n"); + early_boot_irqs_on(); + local_irq_enable(); + + /* + * HACK ALERT! This is early. We're enabling the console before + * we've done PCI setups etc, and console_init() must be aware of + * this. But we do want output early, in case something goes wrong. + */ + console_init(); + if (panic_later) + panic(panic_later, panic_param); + + lockdep_info(); + + /* + * Need to run this when irqs are enabled, because it wants + * to self-test [hard/soft]-irqs on/off lock inversion bugs + * too: + */ + locking_selftest(); + +#ifdef CONFIG_BLK_DEV_INITRD + if (initrd_start && !initrd_below_start_ok && + page_to_pfn(virt_to_page((void *)initrd_start)) < min_low_pfn) { + printk(KERN_CRIT "initrd overwritten (0x%08lx < 0x%08lx) - " + "disabling it.\n", + page_to_pfn(virt_to_page((void *)initrd_start)), + min_low_pfn); + initrd_start = 0; + } +#endif + vmalloc_init(); + vfs_caches_init_early(); + cpuset_init_early(); + page_cgroup_init(); + mem_init(); + enable_debug_pagealloc(); + cpu_hotplug_init(); + kmem_cache_init(); + debug_objects_mem_init(); + idr_init_cache(); + setup_per_cpu_pageset(); + numa_policy_init(); + if (late_time_init) + late_time_init(); + calibrate_delay(); + pidmap_init(); + pgtable_cache_init(); + prio_tree_init(); + anon_vma_init(); +#ifdef CONFIG_X86 + if (efi_enabled) + efi_enter_virtual_mode(); +#endif + thread_info_cache_init(); + fork_init(num_physpages); + proc_caches_init(); + buffer_init(); + key_init(); + security_init(); + vfs_caches_init(num_physpages); + radix_tree_init(); + signals_init(); + /* rootfs populating might need page-writeback */ + page_writeback_init(); +#ifdef CONFIG_PROC_FS + proc_root_init(); +#endif + cgroup_init(); + cpuset_init(); + taskstats_init_early(); + delayacct_init(); + + check_bugs(); + + acpi_early_init(); /* before LAPIC and SMP init */ + + ftrace_init(); + + /* Do the rest non-__init'ed, we're now alive */ + rest_init(); +} + +static int initcall_debug; +core_param(initcall_debug, initcall_debug, bool, 0644); + +int do_one_initcall(initcall_t fn) +{ + int count = preempt_count(); + ktime_t delta; + char msgbuf[64]; + struct boot_trace it; + + if (initcall_debug) { + it.caller = task_pid_nr(current); + printk("calling %pF @ %i\n", fn, it.caller); + it.calltime = ktime_get(); + } + + it.result = fn(); + + if (initcall_debug) { + it.rettime = ktime_get(); + delta = ktime_sub(it.rettime, it.calltime); + it.duration = (unsigned long long) delta.tv64 >> 10; + printk("initcall %pF returned %d after %Ld usecs\n", fn, + it.result, it.duration); + trace_boot(&it, fn); + } + + msgbuf[0] = 0; + + if (it.result && it.result != -ENODEV && initcall_debug) + sprintf(msgbuf, "error code %d ", it.result); + + if (preempt_count() != count) { + strlcat(msgbuf, "preemption imbalance ", sizeof(msgbuf)); + preempt_count() = count; + } + if (irqs_disabled()) { + strlcat(msgbuf, "disabled interrupts ", sizeof(msgbuf)); + local_irq_enable(); + } + if (msgbuf[0]) { + printk("initcall %pF returned with %s\n", fn, msgbuf); + } + + return it.result; +} + + +extern initcall_t __initcall_start[], __initcall_end[], __early_initcall_end[]; + +static void __init do_initcalls(void) +{ + initcall_t *call; + + for (call = __early_initcall_end; call < __initcall_end; call++) + do_one_initcall(*call); + + /* Make sure there is no pending stuff from the initcall sequence */ + flush_scheduled_work(); +} + +/* + * Ok, the machine is now initialized. None of the devices + * have been touched yet, but the CPU subsystem is up and + * running, and memory and process management works. + * + * Now we can finally start doing some real work.. + */ +static void __init do_basic_setup(void) +{ + rcu_init_sched(); /* needed by module_init stage. */ + init_workqueues(); + usermodehelper_init(); + driver_init(); + init_irq_proc(); + do_initcalls(); +} + +static void __init do_pre_smp_initcalls(void) +{ + initcall_t *call; + + for (call = __initcall_start; call < __early_initcall_end; call++) + do_one_initcall(*call); +} + +static void run_init_process(char *init_filename) +{ + argv_init[0] = init_filename; + kernel_execve(init_filename, argv_init, envp_init); +} + +/* This is a non __init function. Force it to be noinline otherwise gcc + * makes it inline to init() and it becomes part of init.text section + */ +static int noinline init_post(void) +{ + free_initmem(); + unlock_kernel(); + mark_rodata_ro(); + system_state = SYSTEM_RUNNING; + numa_default_policy(); + + if (sys_open((const char __user *) "/dev/console", O_RDWR, 0) < 0) + printk(KERN_WARNING "Warning: unable to open an initial console.\n"); + + (void) sys_dup(0); + (void) sys_dup(0); + + current->signal->flags |= SIGNAL_UNKILLABLE; + + if (ramdisk_execute_command) { + run_init_process(ramdisk_execute_command); + printk(KERN_WARNING "Failed to execute %s\n", + ramdisk_execute_command); + } + + /* + * We try each of these until one succeeds. + * + * The Bourne shell can be used instead of init if we are + * trying to recover a really broken machine. + */ + if (execute_command) { + run_init_process(execute_command); + printk(KERN_WARNING "Failed to execute %s. Attempting " + "defaults...\n", execute_command); + } + run_init_process("/sbin/init"); + run_init_process("/etc/init"); + run_init_process("/bin/init"); + run_init_process("/bin/sh"); + + panic("No init found. Try passing init= option to kernel."); +} + +static int __init kernel_init(void * unused) +{ + lock_kernel(); + /* + * init can run on any cpu. + */ + set_cpus_allowed_ptr(current, CPU_MASK_ALL_PTR); + /* + * Tell the world that we're going to be the grim + * reaper of innocent orphaned children. + * + * We don't want people to have to make incorrect + * assumptions about where in the task array this + * can be found. + */ + init_pid_ns.child_reaper = current; + + cad_pid = task_pid(current); + + smp_prepare_cpus(setup_max_cpus); + + do_pre_smp_initcalls(); + start_boot_trace(); + + smp_init(); + sched_init_smp(); + + cpuset_init_smp(); + + do_basic_setup(); + + /* + * check if there is an early userspace init. If yes, let it do all + * the work + */ + + if (!ramdisk_execute_command) + ramdisk_execute_command = "/init"; + + if (sys_access((const char __user *) ramdisk_execute_command, 0) != 0) { + ramdisk_execute_command = NULL; + prepare_namespace(); + } + + /* + * Ok, we have completed the initial bootup, and + * we're essentially up and running. Get rid of the + * initmem segments and start the user-mode stuff.. + */ + stop_boot_trace(); + init_post(); + return 0; +} diff --git a/init/noinitramfs.c b/init/noinitramfs.c new file mode 100644 index 0000000..f4c1a3a --- /dev/null +++ b/init/noinitramfs.c @@ -0,0 +1,52 @@ +/* + * init/noinitramfs.c + * + * Copyright (C) 2006, NXP Semiconductors, All Rights Reserved + * Author: Jean-Paul Saman + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#include +#include +#include +#include + +/* + * Create a simple rootfs that is similar to the default initramfs + */ +static int __init default_rootfs(void) +{ + int err; + + err = sys_mkdir("/dev", 0755); + if (err < 0) + goto out; + + err = sys_mknod((const char __user *) "/dev/console", + S_IFCHR | S_IRUSR | S_IWUSR, + new_encode_dev(MKDEV(5, 1))); + if (err < 0) + goto out; + + err = sys_mkdir("/root", 0700); + if (err < 0) + goto out; + + return 0; + +out: + printk(KERN_WARNING "Failed to create a rootfs\n"); + return err; +} +rootfs_initcall(default_rootfs); diff --git a/init/version.c b/init/version.c new file mode 100644 index 0000000..52a8b98 --- /dev/null +++ b/init/version.c @@ -0,0 +1,47 @@ +/* + * linux/init/version.c + * + * Copyright (C) 1992 Theodore Ts'o + * + * May be freely distributed as part of Linux. + */ + +#include +#include +#include +#include +#include +#include + +#ifndef CONFIG_KALLSYMS +#define version(a) Version_ ## a +#define version_string(a) version(a) + +extern int version_string(LINUX_VERSION_CODE); +int version_string(LINUX_VERSION_CODE); +#endif + +struct uts_namespace init_uts_ns = { + .kref = { + .refcount = ATOMIC_INIT(2), + }, + .name = { + .sysname = UTS_SYSNAME, + .nodename = UTS_NODENAME, + .release = UTS_RELEASE, + .version = UTS_VERSION, + .machine = UTS_MACHINE, + .domainname = UTS_DOMAINNAME, + }, +}; +EXPORT_SYMBOL_GPL(init_uts_ns); + +/* FIXED STRINGS! Don't touch! */ +const char linux_banner[] = + "Linux version " UTS_RELEASE " (" LINUX_COMPILE_BY "@" + LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION "\n"; + +const char linux_proc_banner[] = + "%s version %s" + " (" LINUX_COMPILE_BY "@" LINUX_COMPILE_HOST ")" + " (" LINUX_COMPILER ") %s\n"; -- cgit v1.1