diff options
Diffstat (limited to 'init')
-rw-r--r-- | init/Kconfig | 181 | ||||
-rw-r--r-- | init/do_mounts.c | 1 | ||||
-rw-r--r-- | init/do_mounts_initrd.c | 11 | ||||
-rw-r--r-- | init/do_mounts_rd.c | 1 | ||||
-rw-r--r-- | init/initramfs.c | 15 | ||||
-rw-r--r-- | init/main.c | 137 |
6 files changed, 182 insertions, 164 deletions
diff --git a/init/Kconfig b/init/Kconfig index 313506d8..2de5b1c 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -320,13 +320,17 @@ config AUDITSYSCALL help Enable low-overhead system-call auditing infrastructure that can be used independently or with another kernel subsystem, - such as SELinux. To use audit's filesystem watch feature, please - ensure that INOTIFY is configured. + such as SELinux. + +config AUDIT_WATCH + def_bool y + depends on AUDITSYSCALL + select FSNOTIFY config AUDIT_TREE def_bool y depends on AUDITSYSCALL - select INOTIFY + select FSNOTIFY menu "RCU Subsystem" @@ -404,6 +408,22 @@ config RCU_FANOUT_EXACT Say N if unsure. +config RCU_FAST_NO_HZ + bool "Accelerate last non-dyntick-idle CPU's grace periods" + depends on TREE_RCU && NO_HZ && SMP + default n + help + This option causes RCU to attempt to accelerate grace periods + in order to allow the final CPU to enter dynticks-idle state + more quickly. On the other hand, this option increases the + overhead of the dynticks-idle checking, particularly on systems + with large numbers of CPUs. + + Say Y if energy efficiency is critically important, particularly + if you have relatively few CPUs. + + Say N if you are unsure. + config TREE_RCU_TRACE def_bool RCU_TRACE && ( TREE_RCU || TREE_PREEMPT_RCU ) select DEBUG_FS @@ -453,59 +473,9 @@ config LOG_BUF_SHIFT config HAVE_UNSTABLE_SCHED_CLOCK bool -config GROUP_SCHED - bool "Group CPU scheduler" - depends on EXPERIMENTAL - default n - help - This feature lets CPU scheduler recognize task groups and control CPU - bandwidth allocation to such task groups. - In order to create a group from arbitrary set of processes, use - CONFIG_CGROUPS. (See Control Group support.) - -config FAIR_GROUP_SCHED - bool "Group scheduling for SCHED_OTHER" - depends on GROUP_SCHED - default GROUP_SCHED - -config RT_GROUP_SCHED - bool "Group scheduling for SCHED_RR/FIFO" - depends on EXPERIMENTAL - depends on GROUP_SCHED - default n - help - This feature lets you explicitly allocate real CPU bandwidth - to users or control groups (depending on the "Basis for grouping tasks" - setting below. If enabled, it will also make it impossible to - schedule realtime tasks for non-root users until you allocate - realtime bandwidth for them. - See Documentation/scheduler/sched-rt-group.txt for more information. - -choice - depends on GROUP_SCHED - prompt "Basis for grouping tasks" - default USER_SCHED - -config USER_SCHED - bool "user id" - help - This option will choose userid as the basis for grouping - tasks, thus providing equal CPU bandwidth to each user. - -config CGROUP_SCHED - bool "Control groups" - depends on CGROUPS - help - This option allows you to create arbitrary task groups - using the "cgroup" pseudo filesystem and control - the cpu bandwidth allocated to each such task group. - Refer to Documentation/cgroups/cgroups.txt for more - information on "cgroup" pseudo filesystem. - -endchoice - menuconfig CGROUPS boolean "Control Group support" + depends on EVENTFD help This option adds support for grouping sets of processes together, for use with process control subsystems such as Cpusets, CFS, memory @@ -607,8 +577,8 @@ config CGROUP_MEM_RES_CTLR could in turn add some fork/exit overhead. config CGROUP_MEM_RES_CTLR_SWAP - bool "Memory Resource Controller Swap Extension(EXPERIMENTAL)" - depends on CGROUP_MEM_RES_CTLR && SWAP && EXPERIMENTAL + bool "Memory Resource Controller Swap Extension" + depends on CGROUP_MEM_RES_CTLR && SWAP help Add swap management feature to memory resource controller. When you enable this, you can limit mem+swap usage per cgroup. In other words, @@ -624,6 +594,62 @@ config CGROUP_MEM_RES_CTLR_SWAP Now, memory usage of swap_cgroup is 2 bytes per entry. If swap page size is 4096bytes, 512k per 1Gbytes of swap. +menuconfig CGROUP_SCHED + bool "Group CPU scheduler" + depends on EXPERIMENTAL && CGROUPS + default n + help + This feature lets CPU scheduler recognize task groups and control CPU + bandwidth allocation to such task groups. It uses cgroups to group + tasks. + +if CGROUP_SCHED +config FAIR_GROUP_SCHED + bool "Group scheduling for SCHED_OTHER" + depends on CGROUP_SCHED + default CGROUP_SCHED + +config RT_GROUP_SCHED + bool "Group scheduling for SCHED_RR/FIFO" + depends on EXPERIMENTAL + depends on CGROUP_SCHED + default n + help + This feature lets you explicitly allocate real CPU bandwidth + to task groups. If enabled, it will also make it impossible to + schedule realtime tasks for non-root users until you allocate + realtime bandwidth for them. + See Documentation/scheduler/sched-rt-group.txt for more information. + +endif #CGROUP_SCHED + +config BLK_CGROUP + tristate "Block IO controller" + depends on CGROUPS && BLOCK + default n + ---help--- + Generic block IO controller cgroup interface. This is the common + cgroup interface which should be used by various IO controlling + policies. + + Currently, CFQ IO scheduler uses it to recognize task groups and + control disk bandwidth allocation (proportional time slice allocation) + to such task groups. + + This option only enables generic Block IO controller infrastructure. + One needs to also enable actual IO controlling logic in CFQ for it + to take effect. (CONFIG_CFQ_GROUP_IOSCHED=y). + + See Documentation/cgroups/blkio-controller.txt for more information. + +config DEBUG_BLK_CGROUP + bool "Enable Block IO controller debugging" + depends on BLK_CGROUP + default n + ---help--- + Enable some debugging help. Currently it exports additional stat + files in a cgroup which can be useful for debugging. + endif # CGROUPS config MM_OWNER @@ -984,19 +1010,6 @@ config PERF_EVENTS Say Y if unsure. -config EVENT_PROFILE - bool "Tracepoint profiling sources" - depends on PERF_EVENTS && EVENT_TRACING - default y - help - Allow the use of tracepoints as software performance events. - - When this is enabled, you can create perf events based on - tracepoints using PERF_TYPE_TRACEPOINT and the tracepoint ID - found in debugfs://tracing/events/*/*/id. (The -e/--events - option to the perf tool can parse and interpret symbolic - tracepoints, in the subsystem:tracepoint_name format.) - config PERF_COUNTERS bool "Kernel performance counters (old config option)" depends on HAVE_PERF_EVENTS @@ -1120,7 +1133,7 @@ config MMAP_ALLOW_UNINITIALIZED See Documentation/nommu-mmap.txt for more information. config PROFILING - bool "Profiling support (EXPERIMENTAL)" + bool "Profiling support" help Say Y here to enable the extended profiling support mechanisms used by profilers such as OProfile. @@ -1134,30 +1147,6 @@ config TRACEPOINTS source "arch/Kconfig" -config SLOW_WORK - default n - bool - help - The slow work thread pool provides a number of dynamically allocated - threads that can be used by the kernel to perform operations that - take a relatively long time. - - An example of this would be CacheFiles doing a path lookup followed - by a series of mkdirs and a create call, all of which have to touch - disk. - - See Documentation/slow-work.txt. - -config SLOW_WORK_DEBUG - bool "Slow work debugging through debugfs" - default n - depends on SLOW_WORK && DEBUG_FS - help - Display the contents of the slow work run queue through debugfs, - including items currently executing. - - See Documentation/slow-work.txt. - endmenu # General setup config HAVE_GENERIC_DMA_COHERENT @@ -1270,4 +1259,8 @@ source "block/Kconfig" config PREEMPT_NOTIFIERS bool +config PADATA + depends on SMP + bool + source "kernel/Kconfig.locks" diff --git a/init/do_mounts.c b/init/do_mounts.c index bb008d0..02e3ca4 100644 --- a/init/do_mounts.c +++ b/init/do_mounts.c @@ -15,6 +15,7 @@ #include <linux/initrd.h> #include <linux/async.h> #include <linux/fs_struct.h> +#include <linux/slab.h> #include <linux/nfs_fs.h> #include <linux/nfs_fs_sb.h> diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c index 614241b..3098a38 100644 --- a/init/do_mounts_initrd.c +++ b/init/do_mounts_initrd.c @@ -24,17 +24,14 @@ static int __init no_initrd(char *str) __setup("noinitrd", no_initrd); -static int __init do_linuxrc(void * shell) +static int __init do_linuxrc(void *_shell) { - static char *argv[] = { "linuxrc", NULL, }; - extern char * envp_init[]; + static const char *argv[] = { "linuxrc", NULL, }; + extern const char *envp_init[]; + const char *shell = _shell; sys_close(old_fd);sys_close(root_fd); - sys_close(0);sys_close(1);sys_close(2); sys_setsid(); - (void) sys_open("/dev/console",O_RDWR,0); - (void) sys_dup(0); - (void) sys_dup(0); return kernel_execve(shell, argv, envp_init); } diff --git a/init/do_mounts_rd.c b/init/do_mounts_rd.c index 027a402..bf3ef66 100644 --- a/init/do_mounts_rd.c +++ b/init/do_mounts_rd.c @@ -7,6 +7,7 @@ #include <linux/cramfs_fs.h> #include <linux/initrd.h> #include <linux/string.h> +#include <linux/slab.h> #include "do_mounts.h" #include "../fs/squashfs/squashfs_fs.h" diff --git a/init/initramfs.c b/init/initramfs.c index b37d34b..4b9c202 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -457,7 +457,8 @@ static char * __init unpack_to_rootfs(char *buf, unsigned len) compress_name); message = msg_buf; } - } + } else + error("junk in compressed archive"); if (state != Reset) error("junk in compressed archive"); this_header = saved_offset + my_inptr; @@ -525,7 +526,7 @@ static void __init clean_rootfs(void) int fd; void *buf; struct linux_dirent64 *dirp; - int count; + int num; fd = sys_open("/", O_RDONLY, 0); WARN_ON(fd < 0); @@ -539,9 +540,9 @@ static void __init clean_rootfs(void) } dirp = buf; - count = sys_getdents64(fd, dirp, BUF_SIZE); - while (count > 0) { - while (count > 0) { + num = sys_getdents64(fd, dirp, BUF_SIZE); + while (num > 0) { + while (num > 0) { struct stat st; int ret; @@ -554,12 +555,12 @@ static void __init clean_rootfs(void) sys_unlink(dirp->d_name); } - count -= dirp->d_reclen; + num -= dirp->d_reclen; dirp = (void *)dirp + dirp->d_reclen; } dirp = buf; memset(buf, 0, BUF_SIZE); - count = sys_getdents64(fd, dirp, BUF_SIZE); + num = sys_getdents64(fd, dirp, BUF_SIZE); } sys_close(fd); diff --git a/init/main.c b/init/main.c index dac44a9..94ab488 100644 --- a/init/main.c +++ b/init/main.c @@ -25,7 +25,6 @@ #include <linux/bootmem.h> #include <linux/acpi.h> #include <linux/tty.h> -#include <linux/gfp.h> #include <linux/percpu.h> #include <linux/kmod.h> #include <linux/vmalloc.h> @@ -33,7 +32,6 @@ #include <linux/start_kernel.h> #include <linux/security.h> #include <linux/smp.h> -#include <linux/workqueue.h> #include <linux/profile.h> #include <linux/rcupdate.h> #include <linux/moduleparam.h> @@ -63,13 +61,13 @@ #include <linux/sched.h> #include <linux/signal.h> #include <linux/idr.h> +#include <linux/kgdb.h> #include <linux/ftrace.h> #include <linux/async.h> #include <linux/kmemcheck.h> -#include <linux/kmemtrace.h> #include <linux/sfi.h> #include <linux/shmem_fs.h> -#include <trace/boot.h> +#include <linux/slab.h> #include <asm/io.h> #include <asm/bugs.h> @@ -124,7 +122,9 @@ static char *ramdisk_execute_command; #ifdef CONFIG_SMP /* Setup configured maximum number of CPUs to activate */ -unsigned int __initdata setup_max_cpus = NR_CPUS; +unsigned int setup_max_cpus = NR_CPUS; +EXPORT_SYMBOL(setup_max_cpus); + /* * Setup routine for controlling SMP activation @@ -149,6 +149,20 @@ static int __init nosmp(char *str) early_param("nosmp", nosmp); +/* this is hard limit */ +static int __init nrcpus(char *str) +{ + int nr_cpus; + + get_option(&str, &nr_cpus); + if (nr_cpus > 0 && nr_cpus < nr_cpu_ids) + nr_cpu_ids = nr_cpus; + + return 0; +} + +early_param("nr_cpus", nrcpus); + static int __init maxcpus(char *str) { get_option(&str, &setup_max_cpus); @@ -160,7 +174,7 @@ static int __init maxcpus(char *str) early_param("maxcpus", maxcpus); #else -const unsigned int setup_max_cpus = NR_CPUS; +static const unsigned int setup_max_cpus = NR_CPUS; #endif /* @@ -183,15 +197,15 @@ static int __init set_reset_devices(char *str) __setup("reset_devices", set_reset_devices); -static char * argv_init[MAX_INIT_ARGS+2] = { "init", NULL, }; -char * envp_init[MAX_INIT_ENVS+2] = { "HOME=/", "TERM=linux", NULL, }; +static const char * argv_init[MAX_INIT_ARGS+2] = { "init", NULL, }; +const char * envp_init[MAX_INIT_ENVS+2] = { "HOME=/", "TERM=linux", NULL, }; static const char *panic_later, *panic_param; -extern struct obs_kernel_param __setup_start[], __setup_end[]; +extern const struct obs_kernel_param __setup_start[], __setup_end[]; static int __init obsolete_checksetup(char *line) { - struct obs_kernel_param *p; + const struct obs_kernel_param *p; int had_early_param = 0; p = __setup_start; @@ -407,17 +421,26 @@ static void __init setup_command_line(char *command_line) * gcc-3.4 accidentally inlines this function, so use noinline. */ +static __initdata DECLARE_COMPLETION(kthreadd_done); + static noinline void __init_refok rest_init(void) __releases(kernel_lock) { int pid; rcu_scheduler_starting(); + /* + * We need to spawn init first so that it obtains pid 1, however + * the init task will end up wanting to create kthreads, which, if + * we schedule it before we create kthreadd, will OOPS. + */ kernel_thread(kernel_init, NULL, CLONE_FS | CLONE_SIGHAND); numa_default_policy(); pid = kernel_thread(kthreadd, NULL, CLONE_FS | CLONE_FILES); + rcu_read_lock(); kthreadd_task = find_task_by_pid_ns(pid, &init_pid_ns); - unlock_kernel(); + rcu_read_unlock(); + complete(&kthreadd_done); /* * The boot idle thread must execute schedule() @@ -435,7 +458,7 @@ static noinline void __init_refok rest_init(void) /* Check for early params. */ static int __init do_early_param(char *param, char *val) { - struct obs_kernel_param *p; + const struct obs_kernel_param *p; for (p = __setup_start; p < __setup_end; p++) { if ((p->early && strcmp(param, p->str) == 0) || @@ -505,6 +528,7 @@ static void __init mm_init(void) page_cgroup_init_flatmem(); mem_init(); kmem_cache_init(); + percpu_init_late(); pgtable_cache_init(); vmalloc_init(); } @@ -512,7 +536,7 @@ static void __init mm_init(void) asmlinkage void __init start_kernel(void) { char * command_line; - extern struct kernel_param __start___param[], __stop___param[]; + extern const struct kernel_param __start___param[], __stop___param[]; smp_setup_processor_id(); @@ -538,7 +562,6 @@ asmlinkage void __init start_kernel(void) * Interrupts are still disabled. Do necessary setups, then * enable them */ - lock_kernel(); tick_init(); boot_cpu_init(); page_address_init(); @@ -550,7 +573,7 @@ asmlinkage void __init start_kernel(void) setup_per_cpu_areas(); smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */ - build_all_zonelists(); + build_all_zonelists(NULL); page_alloc_init(); printk(KERN_NOTICE "Kernel command line: %s\n", boot_command_line); @@ -584,6 +607,7 @@ asmlinkage void __init start_kernel(void) local_irq_disable(); } rcu_init(); + radix_tree_init(); /* init some links before init_ISA_irqs() */ early_irq_init(); init_IRQ(); @@ -601,7 +625,7 @@ asmlinkage void __init start_kernel(void) local_irq_enable(); /* Interrupts are enabled now so all GFP allocations are safe. */ - set_gfp_allowed_mask(__GFP_BITS_MASK); + gfp_allowed_mask = __GFP_BITS_MASK; kmem_cache_init_late(); @@ -635,7 +659,6 @@ asmlinkage void __init start_kernel(void) #endif page_cgroup_init(); enable_debug_pagealloc(); - kmemtrace_init(); kmemleak_init(); debug_objects_mem_init(); idr_init_cache(); @@ -658,8 +681,8 @@ asmlinkage void __init start_kernel(void) buffer_init(); key_init(); security_init(); + dbg_late_init(); vfs_caches_init(totalram_pages); - radix_tree_init(); signals_init(); /* rootfs populating might need page-writeback */ page_writeback_init(); @@ -697,38 +720,39 @@ int initcall_debug; core_param(initcall_debug, initcall_debug, bool, 0644); static char msgbuf[64]; -static struct boot_trace_call call; -static struct boot_trace_ret ret; -int do_one_initcall(initcall_t fn) +static int __init_or_module do_one_initcall_debug(initcall_t fn) { - int count = preempt_count(); ktime_t calltime, delta, rettime; + unsigned long long duration; + int ret; + + printk(KERN_DEBUG "calling %pF @ %i\n", fn, task_pid_nr(current)); + calltime = ktime_get(); + ret = fn(); + rettime = ktime_get(); + delta = ktime_sub(rettime, calltime); + duration = (unsigned long long) ktime_to_ns(delta) >> 10; + printk(KERN_DEBUG "initcall %pF returned %d after %lld usecs\n", fn, + ret, duration); + + return ret; +} - if (initcall_debug) { - call.caller = task_pid_nr(current); - printk("calling %pF @ %i\n", fn, call.caller); - calltime = ktime_get(); - trace_boot_call(&call, fn); - enable_boot_trace(); - } - - ret.result = fn(); +int __init_or_module do_one_initcall(initcall_t fn) +{ + int count = preempt_count(); + int ret; - if (initcall_debug) { - disable_boot_trace(); - rettime = ktime_get(); - delta = ktime_sub(rettime, calltime); - ret.duration = (unsigned long long) ktime_to_ns(delta) >> 10; - trace_boot_ret(&ret, fn); - printk("initcall %pF returned %d after %Ld usecs\n", fn, - ret.result, ret.duration); - } + if (initcall_debug) + ret = do_one_initcall_debug(fn); + else + ret = fn(); msgbuf[0] = 0; - if (ret.result && ret.result != -ENODEV && initcall_debug) - sprintf(msgbuf, "error code %d ", ret.result); + if (ret && ret != -ENODEV && initcall_debug) + sprintf(msgbuf, "error code %d ", ret); if (preempt_count() != count) { strlcat(msgbuf, "preemption imbalance ", sizeof(msgbuf)); @@ -742,7 +766,7 @@ int do_one_initcall(initcall_t fn) printk("initcall %pF returned with %s\n", fn, msgbuf); } - return ret.result; + return ret; } @@ -768,7 +792,6 @@ static void __init do_initcalls(void) */ static void __init do_basic_setup(void) { - init_workqueues(); cpuset_init_smp(); usermodehelper_init(); init_tmpfs(); @@ -786,7 +809,7 @@ static void __init do_pre_smp_initcalls(void) do_one_initcall(*fn); } -static void run_init_process(char *init_filename) +static void run_init_process(const char *init_filename) { argv_init[0] = init_filename; kernel_execve(init_filename, argv_init, envp_init); @@ -801,16 +824,10 @@ static noinline int init_post(void) /* need to finish all async __init code before freeing the memory */ async_synchronize_full(); free_initmem(); - unlock_kernel(); mark_rodata_ro(); system_state = SYSTEM_RUNNING; numa_default_policy(); - if (sys_open((const char __user *) "/dev/console", O_RDWR, 0) < 0) - printk(KERN_WARNING "Warning: unable to open an initial console.\n"); - - (void) sys_dup(0); - (void) sys_dup(0); current->signal->flags |= SIGNAL_UNKILLABLE; @@ -836,17 +853,20 @@ static noinline int init_post(void) run_init_process("/bin/init"); run_init_process("/bin/sh"); - panic("No init found. Try passing init= option to kernel."); + panic("No init found. Try passing init= option to kernel. " + "See Linux Documentation/init.txt for guidance."); } static int __init kernel_init(void * unused) { - lock_kernel(); - + /* + * Wait until kthreadd is all set-up. + */ + wait_for_completion(&kthreadd_done); /* * init can allocate pages on any node */ - set_mems_allowed(node_possible_map); + set_mems_allowed(node_states[N_HIGH_MEMORY]); /* * init can run on any cpu. */ @@ -866,13 +886,18 @@ static int __init kernel_init(void * unused) smp_prepare_cpus(setup_max_cpus); do_pre_smp_initcalls(); - start_boot_trace(); smp_init(); sched_init_smp(); do_basic_setup(); + /* Open the /dev/console on the rootfs, this should never fail */ + if (sys_open((const char __user *) "/dev/console", O_RDWR, 0) < 0) + printk(KERN_WARNING "Warning: unable to open an initial console.\n"); + + (void) sys_dup(0); + (void) sys_dup(0); /* * check if there is an early userspace init. If yes, let it do all * the work |