diff options
author | Ingo Molnar <mingo@elte.hu> | 2008-12-31 08:31:57 +0100 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-12-31 08:31:57 +0100 |
commit | a9de18eb761f7c1c860964b2e5addc1a35c7e861 (patch) | |
tree | 886e75fdfd09690cd262ca69cb7f5d1d42b48602 /init | |
parent | b2aaf8f74cdc84a9182f6cabf198b7763bcb9d40 (diff) | |
parent | 6a94cb73064c952255336cc57731904174b2c58f (diff) | |
download | lwn-a9de18eb761f7c1c860964b2e5addc1a35c7e861.tar.gz lwn-a9de18eb761f7c1c860964b2e5addc1a35c7e861.zip |
Merge branch 'linus' into stackprotector
Conflicts:
arch/x86/include/asm/pda.h
kernel/fork.c
Diffstat (limited to 'init')
-rw-r--r-- | init/Kconfig | 149 | ||||
-rw-r--r-- | init/do_mounts_md.c | 2 | ||||
-rw-r--r-- | init/do_mounts_rd.c | 2 | ||||
-rw-r--r-- | init/initramfs.c | 53 | ||||
-rw-r--r-- | init/main.c | 65 |
5 files changed, 233 insertions, 38 deletions
diff --git a/init/Kconfig b/init/Kconfig index 8a8e2d00c40e..13627191a60d 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -299,6 +299,13 @@ config CGROUP_NS for instance virtual servers and checkpoint/restart jobs. +config CGROUP_FREEZER + bool "control group freezer subsystem" + depends on CGROUPS + help + Provides a way to freeze and unfreeze all tasks in a + cgroup. + config CGROUP_DEVICE bool "Device controller for cgroups" depends on CGROUPS && EXPERIMENTAL @@ -347,7 +354,7 @@ config RT_GROUP_SCHED setting below. If enabled, it will also make it impossible to schedule realtime tasks for non-root users until you allocate realtime bandwidth for them. - See Documentation/sched-rt-group.txt for more information. + See Documentation/scheduler/sched-rt-group.txt for more information. choice depends on GROUP_SCHED @@ -394,16 +401,20 @@ config CGROUP_MEM_RES_CTLR depends on CGROUPS && RESOURCE_COUNTERS select MM_OWNER help - Provides a memory resource controller that manages both page cache and - RSS memory. + Provides a memory resource controller that manages both anonymous + memory and page cache. (See Documentation/controllers/memory.txt) Note that setting this option increases fixed memory overhead - associated with each page of memory in the system by 4/8 bytes - and also increases cache misses because struct page on many 64bit - systems will not fit into a single cache line anymore. + associated with each page of memory in the system. By this, + 20(40)bytes/PAGE_SIZE on 32(64)bit system will be occupied by memory + usage tracking struct at boot. Total amount of this is printed out + at boot. Only enable when you're ok with these trade offs and really - sure you need the memory resource controller. + sure you need the memory resource controller. Even when you enable + this, you can set "cgroup_disable=memory" at your boot option to + disable memory resource controller and you can avoid overheads. + (and lose benefits of memory resource contoller) This config option also selects MM_OWNER config option, which could in turn add some fork/exit overhead. @@ -577,6 +588,13 @@ config KALLSYMS_ALL Say N. +config KALLSYMS_STRIP_GENERATED + bool "Strip machine generated symbols from kallsyms" + depends on KALLSYMS_ALL + default y + help + Say N if you want kallsyms to retain even machine generated symbols. + config KALLSYMS_EXTRA_PASS bool "Do an extra kallsyms pass" depends on KALLSYMS @@ -713,6 +731,14 @@ config SHMEM option replaces shmem and tmpfs with the much simpler ramfs code, which may be appropriate on small systems without swap. +config AIO + bool "Enable AIO support" if EMBEDDED + default y + help + This option enables POSIX asynchronous I/O which may by used + by some high performance threaded applications. Disabling + this option saves about 7k. + config VM_EVENT_COUNTERS default y bool "Enable VM event counters for /proc/vmstat" if EMBEDDED @@ -722,6 +748,15 @@ config VM_EVENT_COUNTERS on EMBEDDED systems. /proc/vmstat will only show page counts if VM event counters are disabled. +config PCI_QUIRKS + default y + bool "Enable PCI quirk workarounds" if EMBEDDED + depends on PCI + help + This enables workarounds for various PCI chipset + bugs/quirks. Disable this only if your target machine is + unaffected by PCI quirks. + config SLUB_DEBUG default y bool "Enable SLUB debugging support" if EMBEDDED @@ -743,8 +778,7 @@ config SLAB help The regular slab allocator that is established and known to work well in all environments. It organizes cache hot objects in - per cpu and per node queues. SLAB is the default choice for - a slab allocator. + per cpu and per node queues. config SLUB bool "SLUB (Unqueued Allocator)" @@ -753,7 +787,8 @@ config SLUB instead of managing queues of cached objects (SLAB approach). Per cpu caching is realized using slabs of objects instead of queues of objects. SLUB can use memory efficiently - and has enhanced diagnostics. + and has enhanced diagnostics. SLUB is the default choice for + a slab allocator. config SLOB depends on EMBEDDED @@ -771,8 +806,16 @@ config PROFILING Say Y here to enable the extended profiling support mechanisms used by profilers such as OProfile. +# +# Place an empty function call at each tracepoint site. Can be +# dynamically changed for a probe function. +# +config TRACEPOINTS + bool + config MARKERS bool "Activate markers" + depends on TRACEPOINTS help Place an empty function call at each marker site. Can be dynamically changed for a probe function. @@ -893,10 +936,90 @@ source "block/Kconfig" config PREEMPT_NOTIFIERS bool +choice + prompt "RCU Implementation" + default CLASSIC_RCU + config CLASSIC_RCU - def_bool !PREEMPT_RCU + bool "Classic RCU" help This option selects the classic RCU implementation that is designed for best read-side performance on non-realtime - systems. Classic RCU is the default. Note that the - PREEMPT_RCU symbol is used to select/deselect this option. + systems. + + Select this option if you are unsure. + +config TREE_RCU + bool "Tree-based hierarchical RCU" + help + This option selects the RCU implementation that is + designed for very large SMP system with hundreds or + thousands of CPUs. + +config PREEMPT_RCU + bool "Preemptible RCU" + depends on PREEMPT + help + This option reduces the latency of the kernel by making certain + RCU sections preemptible. Normally RCU code is non-preemptible, if + this option is selected then read-only RCU sections become + preemptible. This helps latency, but may expose bugs due to + now-naive assumptions about each RCU read-side critical section + remaining on a given CPU through its execution. + +endchoice + +config RCU_TRACE + bool "Enable tracing for RCU" + depends on TREE_RCU || PREEMPT_RCU + help + This option provides tracing in RCU which presents stats + in debugfs for debugging RCU implementation. + + Say Y here if you want to enable RCU tracing + Say N if you are unsure. + +config RCU_FANOUT + int "Tree-based hierarchical RCU fanout value" + range 2 64 if 64BIT + range 2 32 if !64BIT + depends on TREE_RCU + default 64 if 64BIT + default 32 if !64BIT + help + This option controls the fanout of hierarchical implementations + of RCU, allowing RCU to work efficiently on machines with + large numbers of CPUs. This value must be at least the cube + root of NR_CPUS, which allows NR_CPUS up to 32,768 for 32-bit + systems and up to 262,144 for 64-bit systems. + + Select a specific number if testing RCU itself. + Take the default if unsure. + +config RCU_FANOUT_EXACT + bool "Disable tree-based hierarchical RCU auto-balancing" + depends on TREE_RCU + default n + help + This option forces use of the exact RCU_FANOUT value specified, + regardless of imbalances in the hierarchy. This is useful for + testing RCU itself, and might one day be useful on systems with + strong NUMA behavior. + + Without RCU_FANOUT_EXACT, the code will balance the hierarchy. + + Say N if unsure. + +config TREE_RCU_TRACE + def_bool RCU_TRACE && TREE_RCU + select DEBUG_FS + help + This option provides tracing for the TREE_RCU implementation, + permitting Makefile to trivially select kernel/rcutree_trace.c. + +config PREEMPT_RCU_TRACE + def_bool RCU_TRACE && PREEMPT_RCU + select DEBUG_FS + help + This option provides tracing for the PREEMPT_RCU implementation, + permitting Makefile to trivially select kernel/rcupreempt_trace.c. diff --git a/init/do_mounts_md.c b/init/do_mounts_md.c index 48b3fadd83ed..d6da5cdd3c38 100644 --- a/init/do_mounts_md.c +++ b/init/do_mounts_md.c @@ -1,4 +1,4 @@ - +#include <linux/delay.h> #include <linux/raid/md.h> #include "do_mounts.h" diff --git a/init/do_mounts_rd.c b/init/do_mounts_rd.c index fedef93b586f..a7c748fa977a 100644 --- a/init/do_mounts_rd.c +++ b/init/do_mounts_rd.c @@ -71,7 +71,7 @@ identify_ramdisk_image(int fd, int start_block) sys_read(fd, buf, size); /* - * If it matches the gzip magic numbers, return -1 + * If it matches the gzip magic numbers, return 0 */ if (buf[0] == 037 && ((buf[1] == 0213) || (buf[1] == 0236))) { printk(KERN_NOTICE diff --git a/init/initramfs.c b/init/initramfs.c index 644fc01ad5f0..4f5ba75aaa7c 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -6,6 +6,7 @@ #include <linux/delay.h> #include <linux/string.h> #include <linux/syscalls.h> +#include <linux/utime.h> static __initdata char *message; static void __init error(char *x) @@ -72,6 +73,49 @@ static void __init free_hash(void) } } +static long __init do_utime(char __user *filename, time_t mtime) +{ + struct timespec t[2]; + + t[0].tv_sec = mtime; + t[0].tv_nsec = 0; + t[1].tv_sec = mtime; + t[1].tv_nsec = 0; + + return do_utimes(AT_FDCWD, filename, t, AT_SYMLINK_NOFOLLOW); +} + +static __initdata LIST_HEAD(dir_list); +struct dir_entry { + struct list_head list; + char *name; + time_t mtime; +}; + +static void __init dir_add(const char *name, time_t mtime) +{ + struct dir_entry *de = kmalloc(sizeof(struct dir_entry), GFP_KERNEL); + if (!de) + panic("can't allocate dir_entry buffer"); + INIT_LIST_HEAD(&de->list); + de->name = kstrdup(name, GFP_KERNEL); + de->mtime = mtime; + list_add(&de->list, &dir_list); +} + +static void __init dir_utime(void) +{ + struct dir_entry *de, *tmp; + list_for_each_entry_safe(de, tmp, &dir_list, list) { + list_del(&de->list); + do_utime(de->name, de->mtime); + kfree(de->name); + kfree(de); + } +} + +static __initdata time_t mtime; + /* cpio header parsing */ static __initdata unsigned long ino, major, minor, nlink; @@ -97,6 +141,7 @@ static void __init parse_header(char *s) uid = parsed[2]; gid = parsed[3]; nlink = parsed[4]; + mtime = parsed[5]; body_len = parsed[6]; major = parsed[7]; minor = parsed[8]; @@ -130,6 +175,7 @@ static inline void __init eat(unsigned n) count -= n; } +static __initdata char *vcollected; static __initdata char *collected; static __initdata int remains; static __initdata char *collect; @@ -271,6 +317,7 @@ static int __init do_name(void) if (wfd >= 0) { sys_fchown(wfd, uid, gid); sys_fchmod(wfd, mode); + vcollected = kstrdup(collected, GFP_KERNEL); state = CopyFile; } } @@ -278,12 +325,14 @@ static int __init do_name(void) sys_mkdir(collected, mode); sys_chown(collected, uid, gid); sys_chmod(collected, mode); + dir_add(collected, mtime); } else if (S_ISBLK(mode) || S_ISCHR(mode) || S_ISFIFO(mode) || S_ISSOCK(mode)) { if (maybe_link() == 0) { sys_mknod(collected, mode, rdev); sys_chown(collected, uid, gid); sys_chmod(collected, mode); + do_utime(collected, mtime); } } return 0; @@ -294,6 +343,8 @@ static int __init do_copy(void) if (count >= body_len) { sys_write(wfd, victim, body_len); sys_close(wfd); + do_utime(vcollected, mtime); + kfree(vcollected); eat(body_len); state = SkipIt; return 0; @@ -311,6 +362,7 @@ static int __init do_symlink(void) clean_path(collected, 0); sys_symlink(collected + N_ALIGN(name_len), collected); sys_lchown(collected, uid, gid); + do_utime(collected, mtime); state = SkipIt; next_state = Reset; return 0; @@ -466,6 +518,7 @@ static char * __init unpack_to_rootfs(char *buf, unsigned len, int check_only) buf += inptr; len -= inptr; } + dir_utime(); kfree(window); kfree(name_buf); kfree(symlink_buf); diff --git a/init/main.c b/init/main.c index 6aaff34a38c0..07da4dea50c3 100644 --- a/init/main.c +++ b/init/main.c @@ -28,6 +28,7 @@ #include <linux/gfp.h> #include <linux/percpu.h> #include <linux/kmod.h> +#include <linux/vmalloc.h> #include <linux/kernel_stat.h> #include <linux/start_kernel.h> #include <linux/security.h> @@ -52,6 +53,7 @@ #include <linux/key.h> #include <linux/unwind.h> #include <linux/buffer_head.h> +#include <linux/page_cgroup.h> #include <linux/debug_locks.h> #include <linux/debugobjects.h> #include <linux/lockdep.h> @@ -61,6 +63,8 @@ #include <linux/sched.h> #include <linux/signal.h> #include <linux/idr.h> +#include <linux/ftrace.h> +#include <trace/boot.h> #include <asm/io.h> #include <asm/bugs.h> @@ -537,6 +541,15 @@ void __init __weak thread_info_cache_init(void) { } +void __init __weak arch_early_irq_init(void) +{ +} + +void __init __weak early_irq_init(void) +{ + arch_early_irq_init(); +} + asmlinkage void __init start_kernel(void) { char * command_line; @@ -607,6 +620,8 @@ asmlinkage void __init start_kernel(void) sort_main_extable(); trap_init(); rcu_init(); + /* init some links before init_ISA_irqs() */ + early_irq_init(); init_IRQ(); pidhash_init(); init_timers(); @@ -649,8 +664,10 @@ asmlinkage void __init start_kernel(void) initrd_start = 0; } #endif + vmalloc_init(); vfs_caches_init_early(); cpuset_init_early(); + page_cgroup_init(); mem_init(); enable_debug_pagealloc(); cpu_hotplug_init(); @@ -671,10 +688,10 @@ asmlinkage void __init start_kernel(void) efi_enter_virtual_mode(); #endif thread_info_cache_init(); + cred_init(); fork_init(num_physpages); proc_caches_init(); buffer_init(); - unnamed_dev_init(); key_init(); security_init(); vfs_caches_init(num_physpages); @@ -694,46 +711,47 @@ asmlinkage void __init start_kernel(void) acpi_early_init(); /* before LAPIC and SMP init */ + ftrace_init(); + /* Do the rest non-__init'ed, we're now alive */ rest_init(); } static int initcall_debug; - -static int __init initcall_debug_setup(char *str) -{ - initcall_debug = 1; - return 1; -} -__setup("initcall_debug", initcall_debug_setup); +core_param(initcall_debug, initcall_debug, bool, 0644); int do_one_initcall(initcall_t fn) { int count = preempt_count(); - ktime_t t0, t1, delta; + ktime_t calltime, delta, rettime; char msgbuf[64]; - int result; + struct boot_trace_call call; + struct boot_trace_ret ret; if (initcall_debug) { - printk("calling %pF @ %i\n", fn, task_pid_nr(current)); - t0 = ktime_get(); + call.caller = task_pid_nr(current); + printk("calling %pF @ %i\n", fn, call.caller); + calltime = ktime_get(); + trace_boot_call(&call, fn); + enable_boot_trace(); } - result = fn(); + ret.result = fn(); if (initcall_debug) { - t1 = ktime_get(); - delta = ktime_sub(t1, t0); - - printk("initcall %pF returned %d after %Ld msecs\n", - fn, result, - (unsigned long long) delta.tv64 >> 20); + disable_boot_trace(); + rettime = ktime_get(); + delta = ktime_sub(rettime, calltime); + ret.duration = (unsigned long long) ktime_to_ns(delta) >> 10; + trace_boot_ret(&ret, fn); + printk("initcall %pF returned %d after %Ld usecs\n", fn, + ret.result, ret.duration); } msgbuf[0] = 0; - if (result && result != -ENODEV && initcall_debug) - sprintf(msgbuf, "error code %d ", result); + if (ret.result && ret.result != -ENODEV && initcall_debug) + sprintf(msgbuf, "error code %d ", ret.result); if (preempt_count() != count) { strlcat(msgbuf, "preemption imbalance ", sizeof(msgbuf)); @@ -747,7 +765,7 @@ int do_one_initcall(initcall_t fn) printk("initcall %pF returned with %s\n", fn, msgbuf); } - return result; + return ret.result; } @@ -774,7 +792,6 @@ static void __init do_initcalls(void) static void __init do_basic_setup(void) { rcu_init_sched(); /* needed by module_init stage. */ - /* drivers will send hotplug events */ init_workqueues(); usermodehelper_init(); driver_init(); @@ -862,6 +879,7 @@ static int __init kernel_init(void * unused) smp_prepare_cpus(setup_max_cpus); do_pre_smp_initcalls(); + start_boot_trace(); smp_init(); sched_init_smp(); @@ -888,6 +906,7 @@ static int __init kernel_init(void * unused) * we're essentially up and running. Get rid of the * initmem segments and start the user-mode stuff.. */ + init_post(); return 0; } |