summaryrefslogtreecommitdiff
path: root/init
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2008-12-31 08:31:57 +0100
committerIngo Molnar <mingo@elte.hu>2008-12-31 08:31:57 +0100
commita9de18eb761f7c1c860964b2e5addc1a35c7e861 (patch)
tree886e75fdfd09690cd262ca69cb7f5d1d42b48602 /init
parentb2aaf8f74cdc84a9182f6cabf198b7763bcb9d40 (diff)
parent6a94cb73064c952255336cc57731904174b2c58f (diff)
downloadlwn-a9de18eb761f7c1c860964b2e5addc1a35c7e861.tar.gz
lwn-a9de18eb761f7c1c860964b2e5addc1a35c7e861.zip
Merge branch 'linus' into stackprotector
Conflicts: arch/x86/include/asm/pda.h kernel/fork.c
Diffstat (limited to 'init')
-rw-r--r--init/Kconfig149
-rw-r--r--init/do_mounts_md.c2
-rw-r--r--init/do_mounts_rd.c2
-rw-r--r--init/initramfs.c53
-rw-r--r--init/main.c65
5 files changed, 233 insertions, 38 deletions
diff --git a/init/Kconfig b/init/Kconfig
index 8a8e2d00c40e..13627191a60d 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -299,6 +299,13 @@ config CGROUP_NS
for instance virtual servers and checkpoint/restart
jobs.
+config CGROUP_FREEZER
+ bool "control group freezer subsystem"
+ depends on CGROUPS
+ help
+ Provides a way to freeze and unfreeze all tasks in a
+ cgroup.
+
config CGROUP_DEVICE
bool "Device controller for cgroups"
depends on CGROUPS && EXPERIMENTAL
@@ -347,7 +354,7 @@ config RT_GROUP_SCHED
setting below. If enabled, it will also make it impossible to
schedule realtime tasks for non-root users until you allocate
realtime bandwidth for them.
- See Documentation/sched-rt-group.txt for more information.
+ See Documentation/scheduler/sched-rt-group.txt for more information.
choice
depends on GROUP_SCHED
@@ -394,16 +401,20 @@ config CGROUP_MEM_RES_CTLR
depends on CGROUPS && RESOURCE_COUNTERS
select MM_OWNER
help
- Provides a memory resource controller that manages both page cache and
- RSS memory.
+ Provides a memory resource controller that manages both anonymous
+ memory and page cache. (See Documentation/controllers/memory.txt)
Note that setting this option increases fixed memory overhead
- associated with each page of memory in the system by 4/8 bytes
- and also increases cache misses because struct page on many 64bit
- systems will not fit into a single cache line anymore.
+ associated with each page of memory in the system. By this,
+ 20(40)bytes/PAGE_SIZE on 32(64)bit system will be occupied by memory
+ usage tracking struct at boot. Total amount of this is printed out
+ at boot.
Only enable when you're ok with these trade offs and really
- sure you need the memory resource controller.
+ sure you need the memory resource controller. Even when you enable
+ this, you can set "cgroup_disable=memory" at your boot option to
+ disable memory resource controller and you can avoid overheads.
+ (and lose benefits of memory resource contoller)
This config option also selects MM_OWNER config option, which
could in turn add some fork/exit overhead.
@@ -577,6 +588,13 @@ config KALLSYMS_ALL
Say N.
+config KALLSYMS_STRIP_GENERATED
+ bool "Strip machine generated symbols from kallsyms"
+ depends on KALLSYMS_ALL
+ default y
+ help
+ Say N if you want kallsyms to retain even machine generated symbols.
+
config KALLSYMS_EXTRA_PASS
bool "Do an extra kallsyms pass"
depends on KALLSYMS
@@ -713,6 +731,14 @@ config SHMEM
option replaces shmem and tmpfs with the much simpler ramfs code,
which may be appropriate on small systems without swap.
+config AIO
+ bool "Enable AIO support" if EMBEDDED
+ default y
+ help
+ This option enables POSIX asynchronous I/O which may by used
+ by some high performance threaded applications. Disabling
+ this option saves about 7k.
+
config VM_EVENT_COUNTERS
default y
bool "Enable VM event counters for /proc/vmstat" if EMBEDDED
@@ -722,6 +748,15 @@ config VM_EVENT_COUNTERS
on EMBEDDED systems. /proc/vmstat will only show page counts
if VM event counters are disabled.
+config PCI_QUIRKS
+ default y
+ bool "Enable PCI quirk workarounds" if EMBEDDED
+ depends on PCI
+ help
+ This enables workarounds for various PCI chipset
+ bugs/quirks. Disable this only if your target machine is
+ unaffected by PCI quirks.
+
config SLUB_DEBUG
default y
bool "Enable SLUB debugging support" if EMBEDDED
@@ -743,8 +778,7 @@ config SLAB
help
The regular slab allocator that is established and known to work
well in all environments. It organizes cache hot objects in
- per cpu and per node queues. SLAB is the default choice for
- a slab allocator.
+ per cpu and per node queues.
config SLUB
bool "SLUB (Unqueued Allocator)"
@@ -753,7 +787,8 @@ config SLUB
instead of managing queues of cached objects (SLAB approach).
Per cpu caching is realized using slabs of objects instead
of queues of objects. SLUB can use memory efficiently
- and has enhanced diagnostics.
+ and has enhanced diagnostics. SLUB is the default choice for
+ a slab allocator.
config SLOB
depends on EMBEDDED
@@ -771,8 +806,16 @@ config PROFILING
Say Y here to enable the extended profiling support mechanisms used
by profilers such as OProfile.
+#
+# Place an empty function call at each tracepoint site. Can be
+# dynamically changed for a probe function.
+#
+config TRACEPOINTS
+ bool
+
config MARKERS
bool "Activate markers"
+ depends on TRACEPOINTS
help
Place an empty function call at each marker site. Can be
dynamically changed for a probe function.
@@ -893,10 +936,90 @@ source "block/Kconfig"
config PREEMPT_NOTIFIERS
bool
+choice
+ prompt "RCU Implementation"
+ default CLASSIC_RCU
+
config CLASSIC_RCU
- def_bool !PREEMPT_RCU
+ bool "Classic RCU"
help
This option selects the classic RCU implementation that is
designed for best read-side performance on non-realtime
- systems. Classic RCU is the default. Note that the
- PREEMPT_RCU symbol is used to select/deselect this option.
+ systems.
+
+ Select this option if you are unsure.
+
+config TREE_RCU
+ bool "Tree-based hierarchical RCU"
+ help
+ This option selects the RCU implementation that is
+ designed for very large SMP system with hundreds or
+ thousands of CPUs.
+
+config PREEMPT_RCU
+ bool "Preemptible RCU"
+ depends on PREEMPT
+ help
+ This option reduces the latency of the kernel by making certain
+ RCU sections preemptible. Normally RCU code is non-preemptible, if
+ this option is selected then read-only RCU sections become
+ preemptible. This helps latency, but may expose bugs due to
+ now-naive assumptions about each RCU read-side critical section
+ remaining on a given CPU through its execution.
+
+endchoice
+
+config RCU_TRACE
+ bool "Enable tracing for RCU"
+ depends on TREE_RCU || PREEMPT_RCU
+ help
+ This option provides tracing in RCU which presents stats
+ in debugfs for debugging RCU implementation.
+
+ Say Y here if you want to enable RCU tracing
+ Say N if you are unsure.
+
+config RCU_FANOUT
+ int "Tree-based hierarchical RCU fanout value"
+ range 2 64 if 64BIT
+ range 2 32 if !64BIT
+ depends on TREE_RCU
+ default 64 if 64BIT
+ default 32 if !64BIT
+ help
+ This option controls the fanout of hierarchical implementations
+ of RCU, allowing RCU to work efficiently on machines with
+ large numbers of CPUs. This value must be at least the cube
+ root of NR_CPUS, which allows NR_CPUS up to 32,768 for 32-bit
+ systems and up to 262,144 for 64-bit systems.
+
+ Select a specific number if testing RCU itself.
+ Take the default if unsure.
+
+config RCU_FANOUT_EXACT
+ bool "Disable tree-based hierarchical RCU auto-balancing"
+ depends on TREE_RCU
+ default n
+ help
+ This option forces use of the exact RCU_FANOUT value specified,
+ regardless of imbalances in the hierarchy. This is useful for
+ testing RCU itself, and might one day be useful on systems with
+ strong NUMA behavior.
+
+ Without RCU_FANOUT_EXACT, the code will balance the hierarchy.
+
+ Say N if unsure.
+
+config TREE_RCU_TRACE
+ def_bool RCU_TRACE && TREE_RCU
+ select DEBUG_FS
+ help
+ This option provides tracing for the TREE_RCU implementation,
+ permitting Makefile to trivially select kernel/rcutree_trace.c.
+
+config PREEMPT_RCU_TRACE
+ def_bool RCU_TRACE && PREEMPT_RCU
+ select DEBUG_FS
+ help
+ This option provides tracing for the PREEMPT_RCU implementation,
+ permitting Makefile to trivially select kernel/rcupreempt_trace.c.
diff --git a/init/do_mounts_md.c b/init/do_mounts_md.c
index 48b3fadd83ed..d6da5cdd3c38 100644
--- a/init/do_mounts_md.c
+++ b/init/do_mounts_md.c
@@ -1,4 +1,4 @@
-
+#include <linux/delay.h>
#include <linux/raid/md.h>
#include "do_mounts.h"
diff --git a/init/do_mounts_rd.c b/init/do_mounts_rd.c
index fedef93b586f..a7c748fa977a 100644
--- a/init/do_mounts_rd.c
+++ b/init/do_mounts_rd.c
@@ -71,7 +71,7 @@ identify_ramdisk_image(int fd, int start_block)
sys_read(fd, buf, size);
/*
- * If it matches the gzip magic numbers, return -1
+ * If it matches the gzip magic numbers, return 0
*/
if (buf[0] == 037 && ((buf[1] == 0213) || (buf[1] == 0236))) {
printk(KERN_NOTICE
diff --git a/init/initramfs.c b/init/initramfs.c
index 644fc01ad5f0..4f5ba75aaa7c 100644
--- a/init/initramfs.c
+++ b/init/initramfs.c
@@ -6,6 +6,7 @@
#include <linux/delay.h>
#include <linux/string.h>
#include <linux/syscalls.h>
+#include <linux/utime.h>
static __initdata char *message;
static void __init error(char *x)
@@ -72,6 +73,49 @@ static void __init free_hash(void)
}
}
+static long __init do_utime(char __user *filename, time_t mtime)
+{
+ struct timespec t[2];
+
+ t[0].tv_sec = mtime;
+ t[0].tv_nsec = 0;
+ t[1].tv_sec = mtime;
+ t[1].tv_nsec = 0;
+
+ return do_utimes(AT_FDCWD, filename, t, AT_SYMLINK_NOFOLLOW);
+}
+
+static __initdata LIST_HEAD(dir_list);
+struct dir_entry {
+ struct list_head list;
+ char *name;
+ time_t mtime;
+};
+
+static void __init dir_add(const char *name, time_t mtime)
+{
+ struct dir_entry *de = kmalloc(sizeof(struct dir_entry), GFP_KERNEL);
+ if (!de)
+ panic("can't allocate dir_entry buffer");
+ INIT_LIST_HEAD(&de->list);
+ de->name = kstrdup(name, GFP_KERNEL);
+ de->mtime = mtime;
+ list_add(&de->list, &dir_list);
+}
+
+static void __init dir_utime(void)
+{
+ struct dir_entry *de, *tmp;
+ list_for_each_entry_safe(de, tmp, &dir_list, list) {
+ list_del(&de->list);
+ do_utime(de->name, de->mtime);
+ kfree(de->name);
+ kfree(de);
+ }
+}
+
+static __initdata time_t mtime;
+
/* cpio header parsing */
static __initdata unsigned long ino, major, minor, nlink;
@@ -97,6 +141,7 @@ static void __init parse_header(char *s)
uid = parsed[2];
gid = parsed[3];
nlink = parsed[4];
+ mtime = parsed[5];
body_len = parsed[6];
major = parsed[7];
minor = parsed[8];
@@ -130,6 +175,7 @@ static inline void __init eat(unsigned n)
count -= n;
}
+static __initdata char *vcollected;
static __initdata char *collected;
static __initdata int remains;
static __initdata char *collect;
@@ -271,6 +317,7 @@ static int __init do_name(void)
if (wfd >= 0) {
sys_fchown(wfd, uid, gid);
sys_fchmod(wfd, mode);
+ vcollected = kstrdup(collected, GFP_KERNEL);
state = CopyFile;
}
}
@@ -278,12 +325,14 @@ static int __init do_name(void)
sys_mkdir(collected, mode);
sys_chown(collected, uid, gid);
sys_chmod(collected, mode);
+ dir_add(collected, mtime);
} else if (S_ISBLK(mode) || S_ISCHR(mode) ||
S_ISFIFO(mode) || S_ISSOCK(mode)) {
if (maybe_link() == 0) {
sys_mknod(collected, mode, rdev);
sys_chown(collected, uid, gid);
sys_chmod(collected, mode);
+ do_utime(collected, mtime);
}
}
return 0;
@@ -294,6 +343,8 @@ static int __init do_copy(void)
if (count >= body_len) {
sys_write(wfd, victim, body_len);
sys_close(wfd);
+ do_utime(vcollected, mtime);
+ kfree(vcollected);
eat(body_len);
state = SkipIt;
return 0;
@@ -311,6 +362,7 @@ static int __init do_symlink(void)
clean_path(collected, 0);
sys_symlink(collected + N_ALIGN(name_len), collected);
sys_lchown(collected, uid, gid);
+ do_utime(collected, mtime);
state = SkipIt;
next_state = Reset;
return 0;
@@ -466,6 +518,7 @@ static char * __init unpack_to_rootfs(char *buf, unsigned len, int check_only)
buf += inptr;
len -= inptr;
}
+ dir_utime();
kfree(window);
kfree(name_buf);
kfree(symlink_buf);
diff --git a/init/main.c b/init/main.c
index 6aaff34a38c0..07da4dea50c3 100644
--- a/init/main.c
+++ b/init/main.c
@@ -28,6 +28,7 @@
#include <linux/gfp.h>
#include <linux/percpu.h>
#include <linux/kmod.h>
+#include <linux/vmalloc.h>
#include <linux/kernel_stat.h>
#include <linux/start_kernel.h>
#include <linux/security.h>
@@ -52,6 +53,7 @@
#include <linux/key.h>
#include <linux/unwind.h>
#include <linux/buffer_head.h>
+#include <linux/page_cgroup.h>
#include <linux/debug_locks.h>
#include <linux/debugobjects.h>
#include <linux/lockdep.h>
@@ -61,6 +63,8 @@
#include <linux/sched.h>
#include <linux/signal.h>
#include <linux/idr.h>
+#include <linux/ftrace.h>
+#include <trace/boot.h>
#include <asm/io.h>
#include <asm/bugs.h>
@@ -537,6 +541,15 @@ void __init __weak thread_info_cache_init(void)
{
}
+void __init __weak arch_early_irq_init(void)
+{
+}
+
+void __init __weak early_irq_init(void)
+{
+ arch_early_irq_init();
+}
+
asmlinkage void __init start_kernel(void)
{
char * command_line;
@@ -607,6 +620,8 @@ asmlinkage void __init start_kernel(void)
sort_main_extable();
trap_init();
rcu_init();
+ /* init some links before init_ISA_irqs() */
+ early_irq_init();
init_IRQ();
pidhash_init();
init_timers();
@@ -649,8 +664,10 @@ asmlinkage void __init start_kernel(void)
initrd_start = 0;
}
#endif
+ vmalloc_init();
vfs_caches_init_early();
cpuset_init_early();
+ page_cgroup_init();
mem_init();
enable_debug_pagealloc();
cpu_hotplug_init();
@@ -671,10 +688,10 @@ asmlinkage void __init start_kernel(void)
efi_enter_virtual_mode();
#endif
thread_info_cache_init();
+ cred_init();
fork_init(num_physpages);
proc_caches_init();
buffer_init();
- unnamed_dev_init();
key_init();
security_init();
vfs_caches_init(num_physpages);
@@ -694,46 +711,47 @@ asmlinkage void __init start_kernel(void)
acpi_early_init(); /* before LAPIC and SMP init */
+ ftrace_init();
+
/* Do the rest non-__init'ed, we're now alive */
rest_init();
}
static int initcall_debug;
-
-static int __init initcall_debug_setup(char *str)
-{
- initcall_debug = 1;
- return 1;
-}
-__setup("initcall_debug", initcall_debug_setup);
+core_param(initcall_debug, initcall_debug, bool, 0644);
int do_one_initcall(initcall_t fn)
{
int count = preempt_count();
- ktime_t t0, t1, delta;
+ ktime_t calltime, delta, rettime;
char msgbuf[64];
- int result;
+ struct boot_trace_call call;
+ struct boot_trace_ret ret;
if (initcall_debug) {
- printk("calling %pF @ %i\n", fn, task_pid_nr(current));
- t0 = ktime_get();
+ call.caller = task_pid_nr(current);
+ printk("calling %pF @ %i\n", fn, call.caller);
+ calltime = ktime_get();
+ trace_boot_call(&call, fn);
+ enable_boot_trace();
}
- result = fn();
+ ret.result = fn();
if (initcall_debug) {
- t1 = ktime_get();
- delta = ktime_sub(t1, t0);
-
- printk("initcall %pF returned %d after %Ld msecs\n",
- fn, result,
- (unsigned long long) delta.tv64 >> 20);
+ disable_boot_trace();
+ rettime = ktime_get();
+ delta = ktime_sub(rettime, calltime);
+ ret.duration = (unsigned long long) ktime_to_ns(delta) >> 10;
+ trace_boot_ret(&ret, fn);
+ printk("initcall %pF returned %d after %Ld usecs\n", fn,
+ ret.result, ret.duration);
}
msgbuf[0] = 0;
- if (result && result != -ENODEV && initcall_debug)
- sprintf(msgbuf, "error code %d ", result);
+ if (ret.result && ret.result != -ENODEV && initcall_debug)
+ sprintf(msgbuf, "error code %d ", ret.result);
if (preempt_count() != count) {
strlcat(msgbuf, "preemption imbalance ", sizeof(msgbuf));
@@ -747,7 +765,7 @@ int do_one_initcall(initcall_t fn)
printk("initcall %pF returned with %s\n", fn, msgbuf);
}
- return result;
+ return ret.result;
}
@@ -774,7 +792,6 @@ static void __init do_initcalls(void)
static void __init do_basic_setup(void)
{
rcu_init_sched(); /* needed by module_init stage. */
- /* drivers will send hotplug events */
init_workqueues();
usermodehelper_init();
driver_init();
@@ -862,6 +879,7 @@ static int __init kernel_init(void * unused)
smp_prepare_cpus(setup_max_cpus);
do_pre_smp_initcalls();
+ start_boot_trace();
smp_init();
sched_init_smp();
@@ -888,6 +906,7 @@ static int __init kernel_init(void * unused)
* we're essentially up and running. Get rid of the
* initmem segments and start the user-mode stuff..
*/
+
init_post();
return 0;
}