summaryrefslogtreecommitdiffstats
path: root/init
diff options
context:
space:
mode:
authorLen Brown <len.brown@intel.com>2009-01-09 03:39:43 -0500
committerLen Brown <len.brown@intel.com>2009-01-09 03:39:43 -0500
commitb2576e1d4408e134e2188c967b1f28af39cd79d4 (patch)
tree004f3c82faab760f304ce031d6d2f572e7746a50 /init
parent3cc8a5f4ba91f67bbdb81a43a99281a26aab8d77 (diff)
parent2150edc6c5cf00f7adb54538b9ea2a3e9cedca3f (diff)
downloadop-kernel-dev-b2576e1d4408e134e2188c967b1f28af39cd79d4.zip
op-kernel-dev-b2576e1d4408e134e2188c967b1f28af39cd79d4.tar.gz
Merge branch 'linus' into release
Diffstat (limited to 'init')
-rw-r--r--init/Kconfig299
-rw-r--r--init/do_mounts.c6
-rw-r--r--init/do_mounts_md.c2
-rw-r--r--init/main.c77
4 files changed, 253 insertions, 131 deletions
diff --git a/init/Kconfig b/init/Kconfig
index f763762..a724a14 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -271,59 +271,6 @@ config LOG_BUF_SHIFT
13 => 8 KB
12 => 4 KB
-config CGROUPS
- bool "Control Group support"
- help
- This option will let you use process cgroup subsystems
- such as Cpusets
-
- Say N if unsure.
-
-config CGROUP_DEBUG
- bool "Example debug cgroup subsystem"
- depends on CGROUPS
- default n
- help
- This option enables a simple cgroup subsystem that
- exports useful debugging information about the cgroups
- framework
-
- Say N if unsure
-
-config CGROUP_NS
- bool "Namespace cgroup subsystem"
- depends on CGROUPS
- help
- Provides a simple namespace cgroup subsystem to
- provide hierarchical naming of sets of namespaces,
- for instance virtual servers and checkpoint/restart
- jobs.
-
-config CGROUP_FREEZER
- bool "control group freezer subsystem"
- depends on CGROUPS
- help
- Provides a way to freeze and unfreeze all tasks in a
- cgroup.
-
-config CGROUP_DEVICE
- bool "Device controller for cgroups"
- depends on CGROUPS && EXPERIMENTAL
- help
- Provides a cgroup implementing whitelists for devices which
- a process in the cgroup can mknod or open.
-
-config CPUSETS
- bool "Cpuset support"
- depends on SMP && CGROUPS
- help
- This option will let you create and manage CPUSETs which
- allow dynamically partitioning a system into sets of CPUs and
- Memory Nodes and assigning tasks to run only within those sets.
- This is primarily useful on large SMP or NUMA systems.
-
- Say N if unsure.
-
#
# Architectures with an unreliable sched_clock() should select this:
#
@@ -337,6 +284,8 @@ config GROUP_SCHED
help
This feature lets CPU scheduler recognize task groups and control CPU
bandwidth allocation to such task groups.
+ In order to create a group from arbitrary set of processes, use
+ CONFIG_CGROUPS. (See Control Group support.)
config FAIR_GROUP_SCHED
bool "Group scheduling for SCHED_OTHER"
@@ -379,6 +328,66 @@ config CGROUP_SCHED
endchoice
+menu "Control Group support"
+config CGROUPS
+ bool "Control Group support"
+ help
+ This option add support for grouping sets of processes together, for
+ use with process control subsystems such as Cpusets, CFS, memory
+ controls or device isolation.
+ See
+ - Documentation/cpusets.txt (Cpusets)
+ - Documentation/scheduler/sched-design-CFS.txt (CFS)
+ - Documentation/cgroups/ (features for grouping, isolation)
+ - Documentation/controllers/ (features for resource control)
+
+ Say N if unsure.
+
+config CGROUP_DEBUG
+ bool "Example debug cgroup subsystem"
+ depends on CGROUPS
+ default n
+ help
+ This option enables a simple cgroup subsystem that
+ exports useful debugging information about the cgroups
+ framework
+
+ Say N if unsure
+
+config CGROUP_NS
+ bool "Namespace cgroup subsystem"
+ depends on CGROUPS
+ help
+ Provides a simple namespace cgroup subsystem to
+ provide hierarchical naming of sets of namespaces,
+ for instance virtual servers and checkpoint/restart
+ jobs.
+
+config CGROUP_FREEZER
+ bool "control group freezer subsystem"
+ depends on CGROUPS
+ help
+ Provides a way to freeze and unfreeze all tasks in a
+ cgroup.
+
+config CGROUP_DEVICE
+ bool "Device controller for cgroups"
+ depends on CGROUPS && EXPERIMENTAL
+ help
+ Provides a cgroup implementing whitelists for devices which
+ a process in the cgroup can mknod or open.
+
+config CPUSETS
+ bool "Cpuset support"
+ depends on SMP && CGROUPS
+ help
+ This option will let you create and manage CPUSETs which
+ allow dynamically partitioning a system into sets of CPUs and
+ Memory Nodes and assigning tasks to run only within those sets.
+ This is primarily useful on large SMP or NUMA systems.
+
+ Say N if unsure.
+
config CGROUP_CPUACCT
bool "Simple CPU accounting cgroup subsystem"
depends on CGROUPS
@@ -393,9 +402,6 @@ config RESOURCE_COUNTERS
infrastructure that works with cgroups
depends on CGROUPS
-config MM_OWNER
- bool
-
config CGROUP_MEM_RES_CTLR
bool "Memory Resource Controller for Control Groups"
depends on CGROUPS && RESOURCE_COUNTERS
@@ -414,36 +420,68 @@ config CGROUP_MEM_RES_CTLR
sure you need the memory resource controller. Even when you enable
this, you can set "cgroup_disable=memory" at your boot option to
disable memory resource controller and you can avoid overheads.
- (and lose benefits of memory resource contoller)
+ (and lose benefits of memory resource controller)
This config option also selects MM_OWNER config option, which
could in turn add some fork/exit overhead.
+config MM_OWNER
+ bool
+
+config CGROUP_MEM_RES_CTLR_SWAP
+ bool "Memory Resource Controller Swap Extension(EXPERIMENTAL)"
+ depends on CGROUP_MEM_RES_CTLR && SWAP && EXPERIMENTAL
+ help
+ Add swap management feature to memory resource controller. When you
+ enable this, you can limit mem+swap usage per cgroup. In other words,
+ when you disable this, memory resource controller has no cares to
+ usage of swap...a process can exhaust all of the swap. This extension
+ is useful when you want to avoid exhaustion swap but this itself
+ adds more overheads and consumes memory for remembering information.
+ Especially if you use 32bit system or small memory system, please
+ be careful about enabling this. When memory resource controller
+ is disabled by boot option, this will be automatically disabled and
+ there will be no overhead from this. Even when you set this config=y,
+ if boot option "noswapaccount" is set, swap will not be accounted.
+
+
+endmenu
+
config SYSFS_DEPRECATED
bool
config SYSFS_DEPRECATED_V2
- bool "Create deprecated sysfs files"
+ bool "Create deprecated sysfs layout for older userspace tools"
depends on SYSFS
default y
select SYSFS_DEPRECATED
help
- This option creates deprecated symlinks such as the
- "device"-link, the <subsystem>:<name>-link, and the
- "bus"-link. It may also add deprecated key in the
- uevent environment.
- None of these features or values should be used today, as
- they export driver core implementation details to userspace
- or export properties which can't be kept stable across kernel
- releases.
-
- If enabled, this option will also move any device structures
- that belong to a class, back into the /sys/class hierarchy, in
- order to support older versions of udev and some userspace
- programs.
-
- If you are using a distro with the most recent userspace
- packages, it should be safe to say N here.
+ This option switches the layout of sysfs to the deprecated
+ version.
+
+ The current sysfs layout features a unified device tree at
+ /sys/devices/, which is able to express a hierarchy between
+ class devices. If the deprecated option is set to Y, the
+ unified device tree is split into a bus device tree at
+ /sys/devices/ and several individual class device trees at
+ /sys/class/. The class and bus devices will be connected by
+ "<subsystem>:<name>" and the "device" links. The "block"
+ class devices, will not show up in /sys/class/block/. Some
+ subsystems will suppress the creation of some devices which
+ depend on the unified device tree.
+
+ This option is not a pure compatibility option that can
+ be safely enabled on newer distributions. It will change the
+ layout of sysfs to the non-extensible deprecated version,
+ and disable some features, which can not be exported without
+ confusing older userspace tools. Since 2007/2008 all major
+ distributions do not enable this option, and ship no tools which
+ depend on the deprecated layout or this option.
+
+ If you are using a new kernel on an older distribution, or use
+ older userspace tools, you might need to say Y here. Do not say Y,
+ if the original kernel, that came with your distribution, has
+ this option set to N.
config PROC_PID_CPUSET
bool "Include legacy /proc/<pid>/cpuset file"
@@ -588,6 +626,13 @@ config KALLSYMS_ALL
Say N.
+config KALLSYMS_STRIP_GENERATED
+ bool "Strip machine generated symbols from kallsyms"
+ depends on KALLSYMS_ALL
+ default y
+ help
+ Say N if you want kallsyms to retain even machine generated symbols.
+
config KALLSYMS_EXTRA_PASS
bool "Do an extra kallsyms pass"
depends on KALLSYMS
@@ -808,6 +853,7 @@ config TRACEPOINTS
config MARKERS
bool "Activate markers"
+ depends on TRACEPOINTS
help
Place an empty function call at each marker site. Can be
dynamically changed for a probe function.
@@ -830,10 +876,6 @@ config RT_MUTEXES
boolean
select PLIST
-config TINY_SHMEM
- default !SHMEM
- bool
-
config BASE_SMALL
int
default 0 if BASE_FULL
@@ -908,14 +950,17 @@ config MODULE_SRCVERSION_ALL
the version). With this option, such a "srcversion" field
will be created for all modules. If unsure, say N.
-config KMOD
- def_bool y
- help
- This is being removed soon. These days, CONFIG_MODULES
- implies CONFIG_KMOD, so use that instead.
-
endif # MODULES
+config INIT_ALL_POSSIBLE
+ bool
+ help
+ Back when each arch used to define their own cpu_online_map and
+ cpu_possible_map, some of them chose to initialize cpu_possible_map
+ with all 1s, and others with all 0s. When they were centralised,
+ it was better to provide this option than to break all the archs
+ and have several arch maintainers persuing me down dark alleys.
+
config STOP_MACHINE
bool
default y
@@ -928,10 +973,90 @@ source "block/Kconfig"
config PREEMPT_NOTIFIERS
bool
+choice
+ prompt "RCU Implementation"
+ default CLASSIC_RCU
+
config CLASSIC_RCU
- def_bool !PREEMPT_RCU
+ bool "Classic RCU"
help
This option selects the classic RCU implementation that is
designed for best read-side performance on non-realtime
- systems. Classic RCU is the default. Note that the
- PREEMPT_RCU symbol is used to select/deselect this option.
+ systems.
+
+ Select this option if you are unsure.
+
+config TREE_RCU
+ bool "Tree-based hierarchical RCU"
+ help
+ This option selects the RCU implementation that is
+ designed for very large SMP system with hundreds or
+ thousands of CPUs.
+
+config PREEMPT_RCU
+ bool "Preemptible RCU"
+ depends on PREEMPT
+ help
+ This option reduces the latency of the kernel by making certain
+ RCU sections preemptible. Normally RCU code is non-preemptible, if
+ this option is selected then read-only RCU sections become
+ preemptible. This helps latency, but may expose bugs due to
+ now-naive assumptions about each RCU read-side critical section
+ remaining on a given CPU through its execution.
+
+endchoice
+
+config RCU_TRACE
+ bool "Enable tracing for RCU"
+ depends on TREE_RCU || PREEMPT_RCU
+ help
+ This option provides tracing in RCU which presents stats
+ in debugfs for debugging RCU implementation.
+
+ Say Y here if you want to enable RCU tracing
+ Say N if you are unsure.
+
+config RCU_FANOUT
+ int "Tree-based hierarchical RCU fanout value"
+ range 2 64 if 64BIT
+ range 2 32 if !64BIT
+ depends on TREE_RCU
+ default 64 if 64BIT
+ default 32 if !64BIT
+ help
+ This option controls the fanout of hierarchical implementations
+ of RCU, allowing RCU to work efficiently on machines with
+ large numbers of CPUs. This value must be at least the cube
+ root of NR_CPUS, which allows NR_CPUS up to 32,768 for 32-bit
+ systems and up to 262,144 for 64-bit systems.
+
+ Select a specific number if testing RCU itself.
+ Take the default if unsure.
+
+config RCU_FANOUT_EXACT
+ bool "Disable tree-based hierarchical RCU auto-balancing"
+ depends on TREE_RCU
+ default n
+ help
+ This option forces use of the exact RCU_FANOUT value specified,
+ regardless of imbalances in the hierarchy. This is useful for
+ testing RCU itself, and might one day be useful on systems with
+ strong NUMA behavior.
+
+ Without RCU_FANOUT_EXACT, the code will balance the hierarchy.
+
+ Say N if unsure.
+
+config TREE_RCU_TRACE
+ def_bool RCU_TRACE && TREE_RCU
+ select DEBUG_FS
+ help
+ This option provides tracing for the TREE_RCU implementation,
+ permitting Makefile to trivially select kernel/rcutree_trace.c.
+
+config PREEMPT_RCU_TRACE
+ def_bool RCU_TRACE && PREEMPT_RCU
+ select DEBUG_FS
+ help
+ This option provides tracing for the PREEMPT_RCU implementation,
+ permitting Makefile to trivially select kernel/rcupreempt_trace.c.
diff --git a/init/do_mounts.c b/init/do_mounts.c
index d055b19..708105e 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -13,6 +13,7 @@
#include <linux/init.h>
#include <linux/fs.h>
#include <linux/initrd.h>
+#include <linux/async.h>
#include <linux/nfs_fs.h>
#include <linux/nfs_fs_sb.h>
@@ -220,10 +221,10 @@ static int __init do_mount_root(char *name, char *fs, int flags, void *data)
sys_chdir("/root");
ROOT_DEV = current->fs->pwd.mnt->mnt_sb->s_dev;
- printk("VFS: Mounted root (%s filesystem)%s.\n",
+ printk("VFS: Mounted root (%s filesystem)%s on device %u:%u.\n",
current->fs->pwd.mnt->mnt_sb->s_type->name,
current->fs->pwd.mnt->mnt_sb->s_flags & MS_RDONLY ?
- " readonly" : "");
+ " readonly" : "", MAJOR(ROOT_DEV), MINOR(ROOT_DEV));
return 0;
}
@@ -372,6 +373,7 @@ void __init prepare_namespace(void)
/* wait for the known devices to complete their probing */
while (driver_probe_done() != 0)
msleep(100);
+ async_synchronize_full();
md_run_setup();
diff --git a/init/do_mounts_md.c b/init/do_mounts_md.c
index d6da5cd..ff95e31 100644
--- a/init/do_mounts_md.c
+++ b/init/do_mounts_md.c
@@ -271,7 +271,7 @@ static int __init raid_setup(char *str)
__setup("raid=", raid_setup);
__setup("md=", md_setup);
-static void autodetect_raid(void)
+static void __init autodetect_raid(void)
{
int fd;
diff --git a/init/main.c b/init/main.c
index 7e117a2..8442094 100644
--- a/init/main.c
+++ b/init/main.c
@@ -50,7 +50,6 @@
#include <linux/rmap.h>
#include <linux/mempolicy.h>
#include <linux/key.h>
-#include <linux/unwind.h>
#include <linux/buffer_head.h>
#include <linux/page_cgroup.h>
#include <linux/debug_locks.h>
@@ -63,6 +62,8 @@
#include <linux/signal.h>
#include <linux/idr.h>
#include <linux/ftrace.h>
+#include <linux/async.h>
+#include <trace/boot.h>
#include <asm/io.h>
#include <asm/bugs.h>
@@ -74,15 +75,6 @@
#include <asm/smp.h>
#endif
-/*
- * This is one of the first .c files built. Error out early if we have compiler
- * trouble.
- */
-
-#if __GNUC__ == 4 && __GNUC_MINOR__ == 1 && __GNUC_PATCHLEVEL__ == 0
-#warning gcc-4.1.0 is known to miscompile the kernel. A different compiler version is recommended.
-#endif
-
static int kernel_init(void *);
extern void init_IRQ(void);
@@ -116,7 +108,7 @@ EXPORT_SYMBOL(system_state);
extern void time_init(void);
/* Default late time init is NULL. archs can override this later. */
-void (*late_time_init)(void);
+void (*__initdata late_time_init)(void);
extern void softirq_init(void);
/* Untouched command line saved by arch-specific code. */
@@ -379,12 +371,7 @@ EXPORT_SYMBOL(nr_cpu_ids);
/* An arch may set nr_cpu_ids earlier if needed, so this would be redundant */
static void __init setup_nr_cpu_ids(void)
{
- int cpu, highest_cpu = 0;
-
- for_each_possible_cpu(cpu)
- highest_cpu = cpu;
-
- nr_cpu_ids = highest_cpu + 1;
+ nr_cpu_ids = find_last_bit(cpumask_bits(cpu_possible_mask),NR_CPUS) + 1;
}
#ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA
@@ -460,7 +447,7 @@ static void __init setup_command_line(char *command_line)
* gcc-3.4 accidentally inlines this function, so use noinline.
*/
-static void noinline __init_refok rest_init(void)
+static noinline void __init_refok rest_init(void)
__releases(kernel_lock)
{
int pid;
@@ -526,9 +513,9 @@ static void __init boot_cpu_init(void)
{
int cpu = smp_processor_id();
/* Mark the boot cpu "present", "online" etc for SMP and UP case */
- cpu_set(cpu, cpu_online_map);
- cpu_set(cpu, cpu_present_map);
- cpu_set(cpu, cpu_possible_map);
+ set_cpu_online(cpu, true);
+ set_cpu_present(cpu, true);
+ set_cpu_possible(cpu, true);
}
void __init __weak smp_setup_processor_id(void)
@@ -550,7 +537,6 @@ asmlinkage void __init start_kernel(void)
* Need to run as early as possible, to initialize the
* lockdep hash:
*/
- unwind_init();
lockdep_init();
debug_objects_early_init();
cgroup_init_early();
@@ -572,7 +558,6 @@ asmlinkage void __init start_kernel(void)
setup_arch(&command_line);
mm_init_owner(&init_mm, &init_task);
setup_command_line(command_line);
- unwind_setup();
setup_per_cpu_areas();
setup_nr_cpu_ids();
smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */
@@ -603,6 +588,8 @@ asmlinkage void __init start_kernel(void)
sort_main_extable();
trap_init();
rcu_init();
+ /* init some links before init_ISA_irqs() */
+ early_irq_init();
init_IRQ();
pidhash_init();
init_timers();
@@ -613,7 +600,8 @@ asmlinkage void __init start_kernel(void)
sched_clock_init();
profile_init();
if (!irqs_disabled())
- printk("start_kernel(): bug: interrupts were enabled early\n");
+ printk(KERN_CRIT "start_kernel(): bug: interrupts were "
+ "enabled early\n");
early_boot_irqs_on();
local_irq_enable();
@@ -669,6 +657,7 @@ asmlinkage void __init start_kernel(void)
efi_enter_virtual_mode();
#endif
thread_info_cache_init();
+ cred_init();
fork_init(num_physpages);
proc_caches_init();
buffer_init();
@@ -697,37 +686,41 @@ asmlinkage void __init start_kernel(void)
rest_init();
}
-static int initcall_debug;
+int initcall_debug;
core_param(initcall_debug, initcall_debug, bool, 0644);
int do_one_initcall(initcall_t fn)
{
int count = preempt_count();
- ktime_t delta;
+ ktime_t calltime, delta, rettime;
char msgbuf[64];
- struct boot_trace it;
+ struct boot_trace_call call;
+ struct boot_trace_ret ret;
if (initcall_debug) {
- it.caller = task_pid_nr(current);
- printk("calling %pF @ %i\n", fn, it.caller);
- it.calltime = ktime_get();
+ call.caller = task_pid_nr(current);
+ printk("calling %pF @ %i\n", fn, call.caller);
+ calltime = ktime_get();
+ trace_boot_call(&call, fn);
+ enable_boot_trace();
}
- it.result = fn();
+ ret.result = fn();
if (initcall_debug) {
- it.rettime = ktime_get();
- delta = ktime_sub(it.rettime, it.calltime);
- it.duration = (unsigned long long) delta.tv64 >> 10;
+ disable_boot_trace();
+ rettime = ktime_get();
+ delta = ktime_sub(rettime, calltime);
+ ret.duration = (unsigned long long) ktime_to_ns(delta) >> 10;
+ trace_boot_ret(&ret, fn);
printk("initcall %pF returned %d after %Ld usecs\n", fn,
- it.result, it.duration);
- trace_boot(&it, fn);
+ ret.result, ret.duration);
}
msgbuf[0] = 0;
- if (it.result && it.result != -ENODEV && initcall_debug)
- sprintf(msgbuf, "error code %d ", it.result);
+ if (ret.result && ret.result != -ENODEV && initcall_debug)
+ sprintf(msgbuf, "error code %d ", ret.result);
if (preempt_count() != count) {
strlcat(msgbuf, "preemption imbalance ", sizeof(msgbuf));
@@ -741,7 +734,7 @@ int do_one_initcall(initcall_t fn)
printk("initcall %pF returned with %s\n", fn, msgbuf);
}
- return it.result;
+ return ret.result;
}
@@ -792,8 +785,10 @@ static void run_init_process(char *init_filename)
/* This is a non __init function. Force it to be noinline otherwise gcc
* makes it inline to init() and it becomes part of init.text section
*/
-static int noinline init_post(void)
+static noinline int init_post(void)
{
+ /* need to finish all async __init code before freeing the memory */
+ async_synchronize_full();
free_initmem();
unlock_kernel();
mark_rodata_ro();
@@ -882,7 +877,7 @@ static int __init kernel_init(void * unused)
* we're essentially up and running. Get rid of the
* initmem segments and start the user-mode stuff..
*/
- stop_boot_trace();
+
init_post();
return 0;
}
OpenPOWER on IntegriCloud