From 775b64d2b6ca37697de925f70799c710aab5849a Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sat, 12 Jan 2008 20:40:46 +0100 Subject: PM: Acquire device locks on suspend This patch reorganizes the way suspend and resume notifications are sent to drivers. The major changes are that now the PM core acquires every device semaphore before calling the methods, and calls to device_add() during suspends will fail, while calls to device_del() during suspends will block. It also provides a way to safely remove a suspended device with the help of the PM core, by using the device_pm_schedule_removal() callback introduced specifically for this purpose, and updates two drivers (msr and cpuid) that need to use it. Signed-off-by: Alan Stern Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpuid.c | 6 +++--- arch/x86/kernel/msr.c | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index 05c9936a16cc..d387c770c518 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c @@ -157,15 +157,15 @@ static int __cpuinit cpuid_class_cpu_callback(struct notifier_block *nfb, switch (action) { case CPU_UP_PREPARE: - case CPU_UP_PREPARE_FROZEN: err = cpuid_device_create(cpu); break; case CPU_UP_CANCELED: - case CPU_UP_CANCELED_FROZEN: case CPU_DEAD: - case CPU_DEAD_FROZEN: cpuid_device_destroy(cpu); break; + case CPU_UP_CANCELED_FROZEN: + destroy_suspended_device(cpuid_class, MKDEV(CPUID_MAJOR, cpu)); + break; } return err ? NOTIFY_BAD : NOTIFY_OK; } diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index ee6eba4ecfea..21f6e3c0be18 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c @@ -155,15 +155,15 @@ static int __cpuinit msr_class_cpu_callback(struct notifier_block *nfb, switch (action) { case CPU_UP_PREPARE: - case CPU_UP_PREPARE_FROZEN: err = msr_device_create(cpu); break; case CPU_UP_CANCELED: - case CPU_UP_CANCELED_FROZEN: case CPU_DEAD: - case CPU_DEAD_FROZEN: msr_device_destroy(cpu); break; + case CPU_UP_CANCELED_FROZEN: + destroy_suspended_device(msr_class, MKDEV(MSR_MAJOR, cpu)); + break; } return err ? NOTIFY_BAD : NOTIFY_OK; } -- cgit v1.2.3 From 5b3f355d8fef95901505e924818b3031092453c2 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 17 Dec 2007 15:54:39 -0400 Subject: Kobject: change arch/x86/kernel/cpu/intel_cacheinfo.c to use kobject_init_and_add Stop using kobject_register, as this way we can control the sending of the uevent properly, after everything is properly initialized. Cc: Ashok Raj Cc: Kay Sievers Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/intel_cacheinfo.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 9f530ff43c21..3509542eed87 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -733,10 +733,8 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev) if (unlikely(retval < 0)) return retval; - cache_kobject[cpu]->parent = &sys_dev->kobj; - kobject_set_name(cache_kobject[cpu], "%s", "cache"); - cache_kobject[cpu]->ktype = &ktype_percpu_entry; - retval = kobject_register(cache_kobject[cpu]); + retval = kobject_init_and_add(cache_kobject[cpu], &ktype_percpu_entry, + &sys_dev->kobj, "%s", "cache"); if (retval < 0) { cpuid4_cache_sysfs_exit(cpu); return retval; @@ -746,10 +744,9 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev) this_object = INDEX_KOBJECT_PTR(cpu,i); this_object->cpu = cpu; this_object->index = i; - this_object->kobj.parent = cache_kobject[cpu]; - kobject_set_name(&(this_object->kobj), "index%1lu", i); - this_object->kobj.ktype = &ktype_cache; - retval = kobject_register(&(this_object->kobj)); + retval = kobject_init_and_add(&(this_object->kobj), + &ktype_cache, cache_kobject[cpu], + "index%1lu", i); if (unlikely(retval)) { for (j = 0; j < i; j++) { kobject_unregister( @@ -759,10 +756,12 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev) cpuid4_cache_sysfs_exit(cpu); break; } + kobject_uevent(&(this_object->kobj), KOBJ_ADD); } if (!retval) cpu_set(cpu, cache_dev_map); + kobject_uevent(cache_kobject[cpu], KOBJ_ADD); return retval; } -- cgit v1.2.3 From a521cf209c6e7042f85b2c5b16da3ffa8004fb7b Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 19 Dec 2007 09:23:20 -0800 Subject: Kobject: change arch/x86/kernel/cpu/mcheck/mce_amd_64.c to use kobject_create_and_add Make this kobject dynamic and convert it to not use kobject_register, which is going away. Cc: Jacob Shin Cc: Kay Sievers Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/mcheck/mce_amd_64.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c index 752fb16a817d..2d65311d206e 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c @@ -65,7 +65,7 @@ static struct threshold_block threshold_defaults = { }; struct threshold_bank { - struct kobject kobj; + struct kobject *kobj; struct threshold_block *blocks; cpumask_t cpus; }; @@ -433,7 +433,7 @@ static __cpuinit int allocate_threshold_blocks(unsigned int cpu, per_cpu(threshold_banks, cpu)[bank]->blocks = b; kobject_set_name(&b->kobj, "misc%i", block); - b->kobj.parent = &per_cpu(threshold_banks, cpu)[bank]->kobj; + b->kobj.parent = per_cpu(threshold_banks, cpu)[bank]->kobj; b->kobj.ktype = &threshold_ktype; err = kobject_register(&b->kobj); if (err) @@ -489,7 +489,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) goto out; err = sysfs_create_link(&per_cpu(device_mce, cpu).kobj, - &b->kobj, name); + b->kobj, name); if (err) goto out; @@ -505,16 +505,15 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) goto out; } - kobject_set_name(&b->kobj, "threshold_bank%i", bank); - b->kobj.parent = &per_cpu(device_mce, cpu).kobj; + b->kobj = kobject_create_and_add(name, &per_cpu(device_mce, cpu).kobj); + if (!b->kobj) + goto out_free; + #ifndef CONFIG_SMP b->cpus = CPU_MASK_ALL; #else b->cpus = per_cpu(cpu_core_map, cpu); #endif - err = kobject_register(&b->kobj); - if (err) - goto out_free; per_cpu(threshold_banks, cpu)[bank] = b; @@ -531,7 +530,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) continue; err = sysfs_create_link(&per_cpu(device_mce, i).kobj, - &b->kobj, name); + b->kobj, name); if (err) goto out; @@ -627,7 +626,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank) deallocate_threshold_block(cpu, bank); free_out: - kobject_unregister(&b->kobj); + kobject_unregister(b->kobj); kfree(b); per_cpu(threshold_banks, cpu)[bank] = NULL; } -- cgit v1.2.3 From 542eb75a27616bdde95c8d3764e0ab703579f8b5 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 19 Dec 2007 09:23:20 -0800 Subject: Kobject: change arch/x86/kernel/cpu/mcheck/mce_amd_64.c to use kobject_init_and_add Stop using kobject_register, as this way we can control the sending of the uevent properly, after everything is properly initialized. Cc: Jacob Shin Cc: Kay Sievers Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/mcheck/mce_amd_64.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c index 2d65311d206e..ef15f35b10ed 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c @@ -432,10 +432,9 @@ static __cpuinit int allocate_threshold_blocks(unsigned int cpu, else per_cpu(threshold_banks, cpu)[bank]->blocks = b; - kobject_set_name(&b->kobj, "misc%i", block); - b->kobj.parent = per_cpu(threshold_banks, cpu)[bank]->kobj; - b->kobj.ktype = &threshold_ktype; - err = kobject_register(&b->kobj); + err = kobject_init_and_add(&b->kobj, &threshold_ktype, + per_cpu(threshold_banks, cpu)[bank]->kobj, + "misc%i", block); if (err) goto out_free; recurse: @@ -451,6 +450,8 @@ recurse: if (err) goto out_free; + kobject_uevent(&b->kobj, KOBJ_ADD); + return err; out_free: -- cgit v1.2.3 From 38a382ae5dd4f4d04e3046816b0a41836094e538 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 20 Dec 2007 08:13:05 -0800 Subject: Kobject: convert arch/* from kobject_unregister() to kobject_put() There is no need for kobject_unregister() anymore, thanks to Kay's kobject cleanup changes, so replace all instances of it with kobject_put(). Cc: Kay Sievers Signed-off-by: Greg Kroah-Hartman --- arch/ia64/kernel/topology.c | 9 ++++----- arch/s390/hypfs/inode.c | 4 ++-- arch/sh/kernel/cpu/sh4/sq.c | 2 +- arch/x86/kernel/cpu/intel_cacheinfo.c | 9 ++++----- arch/x86/kernel/cpu/mcheck/mce_amd_64.c | 6 +++--- 5 files changed, 14 insertions(+), 16 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c index c4311e3adf55..a2484fc1a06c 100644 --- a/arch/ia64/kernel/topology.c +++ b/arch/ia64/kernel/topology.c @@ -366,10 +366,9 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev) "index%1lu", i); if (unlikely(retval)) { for (j = 0; j < i; j++) { - kobject_unregister( - &(LEAF_KOBJECT_PTR(cpu,j)->kobj)); + kobject_put(&(LEAF_KOBJECT_PTR(cpu,j)->kobj)); } - kobject_unregister(&all_cpu_cache_info[cpu].kobj); + kobject_put(&all_cpu_cache_info[cpu].kobj); cpu_cache_sysfs_exit(cpu); break; } @@ -386,10 +385,10 @@ static int __cpuinit cache_remove_dev(struct sys_device * sys_dev) unsigned long i; for (i = 0; i < all_cpu_cache_info[cpu].num_cache_leaves; i++) - kobject_unregister(&(LEAF_KOBJECT_PTR(cpu,i)->kobj)); + kobject_put(&(LEAF_KOBJECT_PTR(cpu,i)->kobj)); if (all_cpu_cache_info[cpu].kobj.parent) { - kobject_unregister(&all_cpu_cache_info[cpu].kobj); + kobject_put(&all_cpu_cache_info[cpu].kobj); memset(&all_cpu_cache_info[cpu].kobj, 0, sizeof(struct kobject)); diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c index 631a6109f642..4b010ff814c9 100644 --- a/arch/s390/hypfs/inode.c +++ b/arch/s390/hypfs/inode.c @@ -517,7 +517,7 @@ static int __init hypfs_init(void) return 0; fail_filesystem: - kobject_unregister(s390_kobj); + kobject_put(s390_kobj); fail_sysfs: if (!MACHINE_IS_VM) hypfs_diag_exit(); @@ -531,7 +531,7 @@ static void __exit hypfs_exit(void) if (!MACHINE_IS_VM) hypfs_diag_exit(); unregister_filesystem(&hypfs_type); - kobject_unregister(s390_kobj); + kobject_put(s390_kobj); } module_init(hypfs_init) diff --git a/arch/sh/kernel/cpu/sh4/sq.c b/arch/sh/kernel/cpu/sh4/sq.c index 97fd9b9a4820..3008c00eea6b 100644 --- a/arch/sh/kernel/cpu/sh4/sq.c +++ b/arch/sh/kernel/cpu/sh4/sq.c @@ -360,7 +360,7 @@ static int __devexit sq_sysdev_remove(struct sys_device *sysdev) unsigned int cpu = sysdev->id; struct kobject *kobj = sq_kobject[cpu]; - kobject_unregister(kobj); + kobject_put(kobj); return 0; } diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 3509542eed87..8b4507b8469b 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -749,10 +749,9 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev) "index%1lu", i); if (unlikely(retval)) { for (j = 0; j < i; j++) { - kobject_unregister( - &(INDEX_KOBJECT_PTR(cpu,j)->kobj)); + kobject_put(&(INDEX_KOBJECT_PTR(cpu,j)->kobj)); } - kobject_unregister(cache_kobject[cpu]); + kobject_put(cache_kobject[cpu]); cpuid4_cache_sysfs_exit(cpu); break; } @@ -777,8 +776,8 @@ static void __cpuinit cache_remove_dev(struct sys_device * sys_dev) cpu_clear(cpu, cache_dev_map); for (i = 0; i < num_cache_leaves; i++) - kobject_unregister(&(INDEX_KOBJECT_PTR(cpu,i)->kobj)); - kobject_unregister(cache_kobject[cpu]); + kobject_put(&(INDEX_KOBJECT_PTR(cpu,i)->kobj)); + kobject_put(cache_kobject[cpu]); cpuid4_cache_sysfs_exit(cpu); } diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c index ef15f35b10ed..753588755fee 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c @@ -456,7 +456,7 @@ recurse: out_free: if (b) { - kobject_unregister(&b->kobj); + kobject_put(&b->kobj); kfree(b); } return err; @@ -581,7 +581,7 @@ static void deallocate_threshold_block(unsigned int cpu, return; list_for_each_entry_safe(pos, tmp, &head->blocks->miscj, miscj) { - kobject_unregister(&pos->kobj); + kobject_put(&pos->kobj); list_del(&pos->miscj); kfree(pos); } @@ -627,7 +627,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank) deallocate_threshold_block(cpu, bank); free_out: - kobject_unregister(b->kobj); + kobject_put(b->kobj); kfree(b); per_cpu(threshold_banks, cpu)[bank] = NULL; } -- cgit v1.2.3 From af5ca3f4ec5cc4432a42a73b050dd8898ce8fd00 Mon Sep 17 00:00:00 2001 From: Kay Sievers Date: Thu, 20 Dec 2007 02:09:39 +0100 Subject: Driver core: change sysdev classes to use dynamic kobject names All kobjects require a dynamically allocated name now. We no longer need to keep track if the name is statically assigned, we can just unconditionally free() all kobject names on cleanup. Signed-off-by: Kay Sievers Signed-off-by: Greg Kroah-Hartman --- arch/arm/kernel/time.c | 4 ++-- arch/arm/mach-integrator/integrator_ap.c | 2 +- arch/arm/mach-pxa/cm-x270.c | 2 +- arch/arm/mach-pxa/lpd270.c | 2 +- arch/arm/mach-pxa/lubbock.c | 2 +- arch/arm/mach-pxa/mainstone.c | 2 +- arch/arm/mach-s3c2410/s3c2410.c | 2 +- arch/arm/mach-s3c2412/s3c2412.c | 2 +- arch/arm/mach-s3c2440/mach-osiris.c | 2 +- arch/arm/mach-s3c2443/s3c2443.c | 2 +- arch/arm/mach-sa1100/irq.c | 2 +- arch/arm/oprofile/common.c | 2 +- arch/arm/plat-omap/gpio.c | 2 +- arch/arm/plat-s3c24xx/dma.c | 2 +- arch/arm/plat-s3c24xx/s3c244x.c | 4 ++-- arch/avr32/kernel/time.c | 2 +- arch/mips/kernel/i8259.c | 2 +- arch/powerpc/platforms/cell/spu_base.c | 2 +- arch/powerpc/platforms/powermac/pic.c | 2 +- arch/powerpc/sysdev/ipic.c | 2 +- arch/powerpc/sysdev/mpic.c | 2 +- arch/powerpc/sysdev/qe_lib/qe_ic.c | 2 +- arch/ppc/syslib/ipic.c | 2 +- arch/ppc/syslib/open_pic.c | 2 +- arch/ppc/syslib/open_pic2.c | 2 +- arch/s390/kernel/time.c | 2 +- arch/sh/drivers/dma/dma-sysfs.c | 2 +- arch/sh/kernel/time.c | 2 +- arch/x86/kernel/apic_32.c | 2 +- arch/x86/kernel/apic_64.c | 2 +- arch/x86/kernel/cpu/mcheck/mce_64.c | 2 +- arch/x86/kernel/i8237.c | 2 +- arch/x86/kernel/i8259_32.c | 2 +- arch/x86/kernel/i8259_64.c | 2 +- arch/x86/kernel/io_apic_32.c | 2 +- arch/x86/kernel/io_apic_64.c | 2 +- arch/x86/kernel/nmi_32.c | 2 +- arch/x86/kernel/nmi_64.c | 2 +- arch/x86/oprofile/nmi_int.c | 2 +- drivers/acpi/pci_link.c | 2 +- drivers/base/class.c | 2 +- drivers/base/cpu.c | 2 +- drivers/base/memory.c | 2 +- drivers/base/node.c | 2 +- drivers/base/sys.c | 1 + drivers/edac/edac_module.c | 2 +- drivers/kvm/kvm_main.c | 2 +- drivers/macintosh/via-pmu.c | 2 +- drivers/scsi/libsas/sas_scsi_host.c | 2 +- include/linux/kobject.h | 13 ++----------- include/linux/sysdev.h | 1 + kernel/rtmutex-tester.c | 2 +- kernel/time/clocksource.c | 2 +- kernel/time/timekeeping.c | 2 +- lib/kobject.c | 14 +++++--------- 55 files changed, 62 insertions(+), 73 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/arm/kernel/time.c b/arch/arm/kernel/time.c index 1533d3ecd7a0..f6f3689a86ee 100644 --- a/arch/arm/kernel/time.c +++ b/arch/arm/kernel/time.c @@ -195,7 +195,7 @@ static int leds_shutdown(struct sys_device *dev) } static struct sysdev_class leds_sysclass = { - set_kset_name("leds"), + .name = "leds", .shutdown = leds_shutdown, .suspend = leds_suspend, .resume = leds_resume, @@ -369,7 +369,7 @@ static int timer_resume(struct sys_device *dev) #endif static struct sysdev_class timer_sysclass = { - set_kset_name("timer"), + .name = "timer", .suspend = timer_suspend, .resume = timer_resume, }; diff --git a/arch/arm/mach-integrator/integrator_ap.c b/arch/arm/mach-integrator/integrator_ap.c index 72280754354d..df37e93c6fc9 100644 --- a/arch/arm/mach-integrator/integrator_ap.c +++ b/arch/arm/mach-integrator/integrator_ap.c @@ -214,7 +214,7 @@ static int irq_resume(struct sys_device *dev) #endif static struct sysdev_class irq_class = { - set_kset_name("irq"), + .name = "irq", .suspend = irq_suspend, .resume = irq_resume, }; diff --git a/arch/arm/mach-pxa/cm-x270.c b/arch/arm/mach-pxa/cm-x270.c index 177664ccb2e2..a16349272f54 100644 --- a/arch/arm/mach-pxa/cm-x270.c +++ b/arch/arm/mach-pxa/cm-x270.c @@ -566,7 +566,7 @@ static int cmx270_resume(struct sys_device *dev) } static struct sysdev_class cmx270_pm_sysclass = { - set_kset_name("pm"), + .name = "pm", .resume = cmx270_resume, .suspend = cmx270_suspend, }; diff --git a/arch/arm/mach-pxa/lpd270.c b/arch/arm/mach-pxa/lpd270.c index 26116440a7c9..78ebad063cba 100644 --- a/arch/arm/mach-pxa/lpd270.c +++ b/arch/arm/mach-pxa/lpd270.c @@ -122,7 +122,7 @@ static int lpd270_irq_resume(struct sys_device *dev) } static struct sysdev_class lpd270_irq_sysclass = { - set_kset_name("cpld_irq"), + .name = "cpld_irq", .resume = lpd270_irq_resume, }; diff --git a/arch/arm/mach-pxa/lubbock.c b/arch/arm/mach-pxa/lubbock.c index 011a1a72b61c..1d3112dc629e 100644 --- a/arch/arm/mach-pxa/lubbock.c +++ b/arch/arm/mach-pxa/lubbock.c @@ -126,7 +126,7 @@ static int lubbock_irq_resume(struct sys_device *dev) } static struct sysdev_class lubbock_irq_sysclass = { - set_kset_name("cpld_irq"), + .name = "cpld_irq", .resume = lubbock_irq_resume, }; diff --git a/arch/arm/mach-pxa/mainstone.c b/arch/arm/mach-pxa/mainstone.c index a4bc3483cbb3..41d8c6cea62b 100644 --- a/arch/arm/mach-pxa/mainstone.c +++ b/arch/arm/mach-pxa/mainstone.c @@ -120,7 +120,7 @@ static int mainstone_irq_resume(struct sys_device *dev) } static struct sysdev_class mainstone_irq_sysclass = { - set_kset_name("cpld_irq"), + .name = "cpld_irq", .resume = mainstone_irq_resume, }; diff --git a/arch/arm/mach-s3c2410/s3c2410.c b/arch/arm/mach-s3c2410/s3c2410.c index e580303cb0ab..0e7991940f81 100644 --- a/arch/arm/mach-s3c2410/s3c2410.c +++ b/arch/arm/mach-s3c2410/s3c2410.c @@ -100,7 +100,7 @@ void __init s3c2410_init_clocks(int xtal) } struct sysdev_class s3c2410_sysclass = { - set_kset_name("s3c2410-core"), + .name = "s3c2410-core", }; static struct sys_device s3c2410_sysdev = { diff --git a/arch/arm/mach-s3c2412/s3c2412.c b/arch/arm/mach-s3c2412/s3c2412.c index 4f92a1562d77..265cd3f567a3 100644 --- a/arch/arm/mach-s3c2412/s3c2412.c +++ b/arch/arm/mach-s3c2412/s3c2412.c @@ -196,7 +196,7 @@ void __init s3c2412_init_clocks(int xtal) */ struct sysdev_class s3c2412_sysclass = { - set_kset_name("s3c2412-core"), + .name = "s3c2412-core", }; static int __init s3c2412_core_init(void) diff --git a/arch/arm/mach-s3c2440/mach-osiris.c b/arch/arm/mach-s3c2440/mach-osiris.c index c326983f4a8f..78af7664988b 100644 --- a/arch/arm/mach-s3c2440/mach-osiris.c +++ b/arch/arm/mach-s3c2440/mach-osiris.c @@ -312,7 +312,7 @@ static int osiris_pm_resume(struct sys_device *sd) #endif static struct sysdev_class osiris_pm_sysclass = { - set_kset_name("mach-osiris"), + .name = "mach-osiris", .suspend = osiris_pm_suspend, .resume = osiris_pm_resume, }; diff --git a/arch/arm/mach-s3c2443/s3c2443.c b/arch/arm/mach-s3c2443/s3c2443.c index 8d8117158d23..9ce490560af9 100644 --- a/arch/arm/mach-s3c2443/s3c2443.c +++ b/arch/arm/mach-s3c2443/s3c2443.c @@ -43,7 +43,7 @@ static struct map_desc s3c2443_iodesc[] __initdata = { }; struct sysdev_class s3c2443_sysclass = { - set_kset_name("s3c2443-core"), + .name = "s3c2443-core", }; static struct sys_device s3c2443_sysdev = { diff --git a/arch/arm/mach-sa1100/irq.c b/arch/arm/mach-sa1100/irq.c index edf3347d9c5b..3dc17d7bf38e 100644 --- a/arch/arm/mach-sa1100/irq.c +++ b/arch/arm/mach-sa1100/irq.c @@ -283,7 +283,7 @@ static int sa1100irq_resume(struct sys_device *dev) } static struct sysdev_class sa1100irq_sysclass = { - set_kset_name("sa11x0-irq"), + .name = "sa11x0-irq", .suspend = sa1100irq_suspend, .resume = sa1100irq_resume, }; diff --git a/arch/arm/oprofile/common.c b/arch/arm/oprofile/common.c index a9de727c9327..0a5cf3a6438b 100644 --- a/arch/arm/oprofile/common.c +++ b/arch/arm/oprofile/common.c @@ -96,7 +96,7 @@ static int op_arm_resume(struct sys_device *dev) } static struct sysdev_class oprofile_sysclass = { - set_kset_name("oprofile"), + .name = "oprofile", .resume = op_arm_resume, .suspend = op_arm_suspend, }; diff --git a/arch/arm/plat-omap/gpio.c b/arch/arm/plat-omap/gpio.c index 6097753394ad..b2a87b8ef673 100644 --- a/arch/arm/plat-omap/gpio.c +++ b/arch/arm/plat-omap/gpio.c @@ -1455,7 +1455,7 @@ static int omap_gpio_resume(struct sys_device *dev) } static struct sysdev_class omap_gpio_sysclass = { - set_kset_name("gpio"), + .name = "gpio", .suspend = omap_gpio_suspend, .resume = omap_gpio_resume, }; diff --git a/arch/arm/plat-s3c24xx/dma.c b/arch/arm/plat-s3c24xx/dma.c index 29696e46ed65..aae1b9cbaf44 100644 --- a/arch/arm/plat-s3c24xx/dma.c +++ b/arch/arm/plat-s3c24xx/dma.c @@ -1265,7 +1265,7 @@ static int s3c2410_dma_resume(struct sys_device *dev) #endif /* CONFIG_PM */ struct sysdev_class dma_sysclass = { - set_kset_name("s3c24xx-dma"), + .name = "s3c24xx-dma", .suspend = s3c2410_dma_suspend, .resume = s3c2410_dma_resume, }; diff --git a/arch/arm/plat-s3c24xx/s3c244x.c b/arch/arm/plat-s3c24xx/s3c244x.c index 3444b13afac5..f197bb3a2366 100644 --- a/arch/arm/plat-s3c24xx/s3c244x.c +++ b/arch/arm/plat-s3c24xx/s3c244x.c @@ -151,13 +151,13 @@ static int s3c244x_resume(struct sys_device *dev) /* Since the S3C2442 and S3C2440 share items, put both sysclasses here */ struct sysdev_class s3c2440_sysclass = { - set_kset_name("s3c2440-core"), + .name = "s3c2440-core", .suspend = s3c244x_suspend, .resume = s3c244x_resume }; struct sysdev_class s3c2442_sysclass = { - set_kset_name("s3c2442-core"), + .name = "s3c2442-core", .suspend = s3c244x_suspend, .resume = s3c244x_resume }; diff --git a/arch/avr32/kernel/time.c b/arch/avr32/kernel/time.c index 7014a3571ec0..36a46c3ae308 100644 --- a/arch/avr32/kernel/time.c +++ b/arch/avr32/kernel/time.c @@ -214,7 +214,7 @@ void __init time_init(void) } static struct sysdev_class timer_class = { - set_kset_name("timer"), + .name = "timer", }; static struct sys_device timer_device = { diff --git a/arch/mips/kernel/i8259.c b/arch/mips/kernel/i8259.c index 471013577108..197d7977de35 100644 --- a/arch/mips/kernel/i8259.c +++ b/arch/mips/kernel/i8259.c @@ -238,7 +238,7 @@ static int i8259A_shutdown(struct sys_device *dev) } static struct sysdev_class i8259_sysdev_class = { - set_kset_name("i8259"), + .name = "i8259", .resume = i8259A_resume, .shutdown = i8259A_shutdown, }; diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c index c83c3e3f5178..a08862203643 100644 --- a/arch/powerpc/platforms/cell/spu_base.c +++ b/arch/powerpc/platforms/cell/spu_base.c @@ -459,7 +459,7 @@ static int spu_shutdown(struct sys_device *sysdev) } static struct sysdev_class spu_sysdev_class = { - set_kset_name("spu"), + .name = "spu", .shutdown = spu_shutdown, }; diff --git a/arch/powerpc/platforms/powermac/pic.c b/arch/powerpc/platforms/powermac/pic.c index 999f5e160897..84c0d4ef76a2 100644 --- a/arch/powerpc/platforms/powermac/pic.c +++ b/arch/powerpc/platforms/powermac/pic.c @@ -663,7 +663,7 @@ static int pmacpic_resume(struct sys_device *sysdev) #endif /* CONFIG_PM && CONFIG_PPC32 */ static struct sysdev_class pmacpic_sysclass = { - set_kset_name("pmac_pic"), + .name = "pmac_pic", }; static struct sys_device device_pmacpic = { diff --git a/arch/powerpc/sysdev/ipic.c b/arch/powerpc/sysdev/ipic.c index 05a56e55804c..e898ff4d2b97 100644 --- a/arch/powerpc/sysdev/ipic.c +++ b/arch/powerpc/sysdev/ipic.c @@ -725,7 +725,7 @@ unsigned int ipic_get_irq(void) } static struct sysdev_class ipic_sysclass = { - set_kset_name("ipic"), + .name = "ipic", }; static struct sys_device device_ipic = { diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c index e47938899a92..212a94f5d34b 100644 --- a/arch/powerpc/sysdev/mpic.c +++ b/arch/powerpc/sysdev/mpic.c @@ -1584,7 +1584,7 @@ static struct sysdev_class mpic_sysclass = { .resume = mpic_resume, .suspend = mpic_suspend, #endif - set_kset_name("mpic"), + .name = "mpic", }; static int mpic_init_sys(void) diff --git a/arch/powerpc/sysdev/qe_lib/qe_ic.c b/arch/powerpc/sysdev/qe_lib/qe_ic.c index e1c0fd6dbc1a..f59444d3be75 100644 --- a/arch/powerpc/sysdev/qe_lib/qe_ic.c +++ b/arch/powerpc/sysdev/qe_lib/qe_ic.c @@ -483,7 +483,7 @@ int qe_ic_set_high_priority(unsigned int virq, unsigned int priority, int high) } static struct sysdev_class qe_ic_sysclass = { - set_kset_name("qe_ic"), + .name = "qe_ic", }; static struct sys_device device_qe_ic = { diff --git a/arch/ppc/syslib/ipic.c b/arch/ppc/syslib/ipic.c index 9192777d0f78..4f163e20939e 100644 --- a/arch/ppc/syslib/ipic.c +++ b/arch/ppc/syslib/ipic.c @@ -614,7 +614,7 @@ int ipic_get_irq(void) } static struct sysdev_class ipic_sysclass = { - set_kset_name("ipic"), + .name = "ipic", }; static struct sys_device device_ipic = { diff --git a/arch/ppc/syslib/open_pic.c b/arch/ppc/syslib/open_pic.c index 18ec94733293..da36522d327a 100644 --- a/arch/ppc/syslib/open_pic.c +++ b/arch/ppc/syslib/open_pic.c @@ -1043,7 +1043,7 @@ int openpic_resume(struct sys_device *sysdev) #endif /* CONFIG_PM */ static struct sysdev_class openpic_sysclass = { - set_kset_name("openpic"), + .name = "openpic", }; static struct sys_device device_openpic = { diff --git a/arch/ppc/syslib/open_pic2.c b/arch/ppc/syslib/open_pic2.c index d585207f9f77..449075a04798 100644 --- a/arch/ppc/syslib/open_pic2.c +++ b/arch/ppc/syslib/open_pic2.c @@ -666,7 +666,7 @@ int openpic2_resume(struct sys_device *sysdev) /* HACK ALERT */ static struct sysdev_class openpic2_sysclass = { - set_kset_name("openpic2"), + .name = "openpic2", }; static struct sys_device device_openpic2 = { diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 22b800ce2126..3bbac1293be4 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -1145,7 +1145,7 @@ static void etr_work_fn(struct work_struct *work) * Sysfs interface functions */ static struct sysdev_class etr_sysclass = { - set_kset_name("etr") + .name = "etr", }; static struct sys_device etr_port0_dev = { diff --git a/arch/sh/drivers/dma/dma-sysfs.c b/arch/sh/drivers/dma/dma-sysfs.c index eebcd4768bbf..51b57c0d1a3c 100644 --- a/arch/sh/drivers/dma/dma-sysfs.c +++ b/arch/sh/drivers/dma/dma-sysfs.c @@ -19,7 +19,7 @@ #include static struct sysdev_class dma_sysclass = { - set_kset_name("dma"), + .name = "dma", }; EXPORT_SYMBOL(dma_sysclass); diff --git a/arch/sh/kernel/time.c b/arch/sh/kernel/time.c index a3a67d151e52..2bc04bfee738 100644 --- a/arch/sh/kernel/time.c +++ b/arch/sh/kernel/time.c @@ -174,7 +174,7 @@ int timer_resume(struct sys_device *dev) #endif static struct sysdev_class timer_sysclass = { - set_kset_name("timer"), + .name = "timer", .suspend = timer_suspend, .resume = timer_resume, }; diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index edb5108e5d0e..a56c782653be 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1530,7 +1530,7 @@ static int lapic_resume(struct sys_device *dev) */ static struct sysdev_class lapic_sysclass = { - set_kset_name("lapic"), + .name = "lapic", .resume = lapic_resume, .suspend = lapic_suspend, }; diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index f28ccb588fba..fa6cdee6d303 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -639,7 +639,7 @@ static int lapic_resume(struct sys_device *dev) } static struct sysdev_class lapic_sysclass = { - set_kset_name("lapic"), + .name = "lapic", .resume = lapic_resume, .suspend = lapic_suspend, }; diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c index 4b21d29fb5aa..242e8668dbeb 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_64.c @@ -745,7 +745,7 @@ static void mce_restart(void) static struct sysdev_class mce_sysclass = { .resume = mce_resume, - set_kset_name("machinecheck"), + .name = "machinecheck", }; DEFINE_PER_CPU(struct sys_device, device_mce); diff --git a/arch/x86/kernel/i8237.c b/arch/x86/kernel/i8237.c index 29313832df0c..dbd6c1d1b638 100644 --- a/arch/x86/kernel/i8237.c +++ b/arch/x86/kernel/i8237.c @@ -51,7 +51,7 @@ static int i8237A_suspend(struct sys_device *dev, pm_message_t state) } static struct sysdev_class i8237_sysdev_class = { - set_kset_name("i8237"), + .name = "i8237", .suspend = i8237A_suspend, .resume = i8237A_resume, }; diff --git a/arch/x86/kernel/i8259_32.c b/arch/x86/kernel/i8259_32.c index f634fc715c99..5f3496d01984 100644 --- a/arch/x86/kernel/i8259_32.c +++ b/arch/x86/kernel/i8259_32.c @@ -258,7 +258,7 @@ static int i8259A_shutdown(struct sys_device *dev) } static struct sysdev_class i8259_sysdev_class = { - set_kset_name("i8259"), + .name = "i8259", .suspend = i8259A_suspend, .resume = i8259A_resume, .shutdown = i8259A_shutdown, diff --git a/arch/x86/kernel/i8259_64.c b/arch/x86/kernel/i8259_64.c index 3f27ea0b9816..ba6d57286f56 100644 --- a/arch/x86/kernel/i8259_64.c +++ b/arch/x86/kernel/i8259_64.c @@ -370,7 +370,7 @@ static int i8259A_shutdown(struct sys_device *dev) } static struct sysdev_class i8259_sysdev_class = { - set_kset_name("i8259"), + .name = "i8259", .suspend = i8259A_suspend, .resume = i8259A_resume, .shutdown = i8259A_shutdown, diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index a6b1490e00c4..ab77f1905469 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -2401,7 +2401,7 @@ static int ioapic_resume(struct sys_device *dev) } static struct sysdev_class ioapic_sysdev_class = { - set_kset_name("ioapic"), + .name = "ioapic", .suspend = ioapic_suspend, .resume = ioapic_resume, }; diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index cbac1670c7c3..23a3ac06a23e 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -1850,7 +1850,7 @@ static int ioapic_resume(struct sys_device *dev) } static struct sysdev_class ioapic_sysdev_class = { - set_kset_name("ioapic"), + .name = "ioapic", .suspend = ioapic_suspend, .resume = ioapic_resume, }; diff --git a/arch/x86/kernel/nmi_32.c b/arch/x86/kernel/nmi_32.c index 852db2906921..4f4bfd3a88b6 100644 --- a/arch/x86/kernel/nmi_32.c +++ b/arch/x86/kernel/nmi_32.c @@ -176,7 +176,7 @@ static int lapic_nmi_resume(struct sys_device *dev) static struct sysdev_class nmi_sysclass = { - set_kset_name("lapic_nmi"), + .name = "lapic_nmi", .resume = lapic_nmi_resume, .suspend = lapic_nmi_suspend, }; diff --git a/arch/x86/kernel/nmi_64.c b/arch/x86/kernel/nmi_64.c index 4253c4e8849c..c3d1476b6a11 100644 --- a/arch/x86/kernel/nmi_64.c +++ b/arch/x86/kernel/nmi_64.c @@ -211,7 +211,7 @@ static int lapic_nmi_resume(struct sys_device *dev) } static struct sysdev_class nmi_sysclass = { - set_kset_name("lapic_nmi"), + .name = "lapic_nmi", .resume = lapic_nmi_resume, .suspend = lapic_nmi_suspend, }; diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 944bbcdd2b8d..c8ab79ef4276 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -51,7 +51,7 @@ static int nmi_resume(struct sys_device *dev) static struct sysdev_class oprofile_sysclass = { - set_kset_name("oprofile"), + .name = "oprofile", .resume = nmi_resume, .suspend = nmi_suspend, }; diff --git a/drivers/acpi/pci_link.c b/drivers/acpi/pci_link.c index c9f526e55392..5400ea173f6f 100644 --- a/drivers/acpi/pci_link.c +++ b/drivers/acpi/pci_link.c @@ -911,7 +911,7 @@ __setup("acpi_irq_balance", acpi_irq_balance_set); /* FIXME: we will remove this interface after all drivers call pci_disable_device */ static struct sysdev_class irqrouter_sysdev_class = { - set_kset_name("irqrouter"), + .name = "irqrouter", .resume = irqrouter_resume, }; diff --git a/drivers/base/class.c b/drivers/base/class.c index 61fd26cc9f0e..b962a76875d2 100644 --- a/drivers/base/class.c +++ b/drivers/base/class.c @@ -466,7 +466,6 @@ static struct kset_uevent_ops class_uevent_ops = { * entirely soon. */ static struct kset class_obj_subsys = { - .kobj = { .k_name = "class_obj", }, .uevent_ops = &class_uevent_ops, }; @@ -872,6 +871,7 @@ int __init classes_init(void) /* ick, this is ugly, the things we go through to keep from showing up * in sysfs... */ kset_init(&class_obj_subsys); + kobject_set_name(&class_obj_subsys.kobj, "class_obj"); if (!class_obj_subsys.kobj.parent) class_obj_subsys.kobj.parent = &class_obj_subsys.kobj; return 0; diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index 40545071e3c9..c5885f5ce0ac 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -14,7 +14,7 @@ #include "base.h" struct sysdev_class cpu_sysdev_class = { - set_kset_name("cpu"), + .name = "cpu", }; EXPORT_SYMBOL(cpu_sysdev_class); diff --git a/drivers/base/memory.c b/drivers/base/memory.c index 7868707c7eda..7ae413fdd5fc 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -26,7 +26,7 @@ #define MEMORY_CLASS_NAME "memory" static struct sysdev_class memory_sysdev_class = { - set_kset_name(MEMORY_CLASS_NAME), + .name = MEMORY_CLASS_NAME, }; static const char *memory_uevent_name(struct kset *kset, struct kobject *kobj) diff --git a/drivers/base/node.c b/drivers/base/node.c index 88eeed72b5d6..e59861f18ce5 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -15,7 +15,7 @@ #include static struct sysdev_class node_class = { - set_kset_name("node"), + .name = "node", }; diff --git a/drivers/base/sys.c b/drivers/base/sys.c index e666441dd76b..2f79c55acdcc 100644 --- a/drivers/base/sys.c +++ b/drivers/base/sys.c @@ -136,6 +136,7 @@ int sysdev_class_register(struct sysdev_class * cls) cls->kset.kobj.parent = &system_kset->kobj; cls->kset.kobj.ktype = &ktype_sysdev_class; cls->kset.kobj.kset = system_kset; + kobject_set_name(&cls->kset.kobj, cls->name); return kset_register(&cls->kset); } diff --git a/drivers/edac/edac_module.c b/drivers/edac/edac_module.c index e0c4a4086055..7e1374afd967 100644 --- a/drivers/edac/edac_module.c +++ b/drivers/edac/edac_module.c @@ -31,7 +31,7 @@ struct workqueue_struct *edac_workqueue; * need to export to other files in this modules */ static struct sysdev_class edac_class = { - set_kset_name("edac"), + .name = "edac", }; static int edac_class_valid; diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 47c10b8f89b3..c0f372f1d761 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -3451,7 +3451,7 @@ static int kvm_resume(struct sys_device *dev) } static struct sysdev_class kvm_sysdev_class = { - set_kset_name("kvm"), + .name = "kvm", .suspend = kvm_suspend, .resume = kvm_resume, }; diff --git a/drivers/macintosh/via-pmu.c b/drivers/macintosh/via-pmu.c index 6123c70153d3..ac420b17e16f 100644 --- a/drivers/macintosh/via-pmu.c +++ b/drivers/macintosh/via-pmu.c @@ -2796,7 +2796,7 @@ static int pmu_sys_resume(struct sys_device *sysdev) #endif /* CONFIG_PM_SLEEP && CONFIG_PPC32 */ static struct sysdev_class pmu_sysclass = { - set_kset_name("pmu"), + .name = "pmu", }; static struct sys_device device_pmu = { diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c index 7663841eb4cf..a3fdc57e2673 100644 --- a/drivers/scsi/libsas/sas_scsi_host.c +++ b/drivers/scsi/libsas/sas_scsi_host.c @@ -464,7 +464,7 @@ int sas_eh_bus_reset_handler(struct scsi_cmnd *cmd) res = sas_phy_reset(phy, 1); if (res) SAS_DPRINTK("Bus reset of %s failed 0x%x\n", - phy->dev.kobj.k_name, + kobject_name(&phy->dev.kobj), res); if (res == TMF_RESP_FUNC_SUCC || res == TMF_RESP_FUNC_COMPLETE) return SUCCESS; diff --git a/include/linux/kobject.h b/include/linux/kobject.h index 504ac0eb4412..4adbe1d83081 100644 --- a/include/linux/kobject.h +++ b/include/linux/kobject.h @@ -61,7 +61,7 @@ enum kobject_action { }; struct kobject { - const char * k_name; + const char *name; struct kref kref; struct list_head entry; struct kobject * parent; @@ -69,7 +69,6 @@ struct kobject { struct kobj_type * ktype; struct sysfs_dirent * sd; unsigned int state_initialized:1; - unsigned int state_name_set:1; unsigned int state_in_sysfs:1; unsigned int state_add_uevent_sent:1; unsigned int state_remove_uevent_sent:1; @@ -80,7 +79,7 @@ extern int kobject_set_name(struct kobject *, const char *, ...) static inline const char * kobject_name(const struct kobject * kobj) { - return kobj->k_name; + return kobj->name; } extern void kobject_init(struct kobject *kobj, struct kobj_type *ktype); @@ -189,14 +188,6 @@ static inline struct kobj_type *get_ktype(struct kobject *kobj) extern struct kobject * kset_find_obj(struct kset *, const char *); - -/* - * Use this when initializing an embedded kset with no other - * fields to initialize. - */ -#define set_kset_name(str) .kset = { .kobj = { .k_name = str } } - - /* The global /sys/kernel/ kobject for people to chain off of */ extern struct kobject *kernel_kobj; /* The global /sys/hypervisor/ kobject for people to chain off of */ diff --git a/include/linux/sysdev.h b/include/linux/sysdev.h index e285746588d6..f752e73bf977 100644 --- a/include/linux/sysdev.h +++ b/include/linux/sysdev.h @@ -29,6 +29,7 @@ struct sys_device; struct sysdev_class { + const char *name; struct list_head drivers; /* Default operations for these types of devices */ diff --git a/kernel/rtmutex-tester.c b/kernel/rtmutex-tester.c index e3055ba69159..092e4c620af9 100644 --- a/kernel/rtmutex-tester.c +++ b/kernel/rtmutex-tester.c @@ -394,7 +394,7 @@ static SYSDEV_ATTR(status, 0600, sysfs_test_status, NULL); static SYSDEV_ATTR(command, 0600, NULL, sysfs_test_command); static struct sysdev_class rttest_sysclass = { - set_kset_name("rttest"), + .name = "rttest", }; static int init_test_thread(int id) diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index c8a9d13874df..8d6125ad2cf0 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -441,7 +441,7 @@ static SYSDEV_ATTR(available_clocksource, 0600, sysfs_show_available_clocksources, NULL); static struct sysdev_class clocksource_sysclass = { - set_kset_name("clocksource"), + .name = "clocksource", }; static struct sys_device device_clocksource = { diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index e5e466b27598..ab46ae8c062b 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -335,9 +335,9 @@ static int timekeeping_suspend(struct sys_device *dev, pm_message_t state) /* sysfs resume/suspend bits for timekeeping */ static struct sysdev_class timekeeping_sysclass = { + .name = "timekeeping", .resume = timekeeping_resume, .suspend = timekeeping_suspend, - set_kset_name("timekeeping"), }; static struct sys_device device_timer = { diff --git a/lib/kobject.c b/lib/kobject.c index a0773734545c..8dc32454661d 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -165,7 +165,7 @@ static int kobject_add_internal(struct kobject *kobj) if (!kobj) return -ENOENT; - if (!kobj->k_name || !kobj->k_name[0]) { + if (!kobj->name || !kobj->name[0]) { pr_debug("kobject: (%p): attempted to be registered with empty " "name!\n", kobj); WARN_ON(1); @@ -228,13 +228,11 @@ static int kobject_set_name_vargs(struct kobject *kobj, const char *fmt, if (!name) return -ENOMEM; - /* Free the old name, if necessary. */ - kfree(kobj->k_name); + kfree(kobj->name); /* Now, set the new name */ - kobj->k_name = name; - kobj->state_name_set = 1; + kobj->name = name; return 0; } @@ -295,7 +293,6 @@ void kobject_init(struct kobject *kobj, struct kobj_type *ktype) kref_init(&kobj->kref); INIT_LIST_HEAD(&kobj->entry); kobj->ktype = ktype; - kobj->state_name_set = 0; kobj->state_in_sysfs = 0; kobj->state_add_uevent_sent = 0; kobj->state_remove_uevent_sent = 0; @@ -551,8 +548,7 @@ struct kobject * kobject_get(struct kobject * kobj) static void kobject_cleanup(struct kobject *kobj) { struct kobj_type *t = get_ktype(kobj); - const char *name = kobj->k_name; - int name_set = kobj->state_name_set; + const char *name = kobj->name; pr_debug("kobject: '%s' (%p): %s\n", kobject_name(kobj), kobj, __FUNCTION__); @@ -583,7 +579,7 @@ static void kobject_cleanup(struct kobject *kobj) } /* free name if we allocated it */ - if (name_set && name) { + if (name) { pr_debug("kobject: '%s': free name\n", name); kfree(name); } -- cgit v1.2.3 From 86ef5c9a8edd78e6bf92879f32329d89b2d55b5a Mon Sep 17 00:00:00 2001 From: Gautham R Shenoy Date: Fri, 25 Jan 2008 21:08:02 +0100 Subject: cpu-hotplug: replace lock_cpu_hotplug() with get_online_cpus() Replace all lock_cpu_hotplug/unlock_cpu_hotplug from the kernel and use get_online_cpus and put_online_cpus instead as it highlights the refcount semantics in these operations. The new API guarantees protection against the cpu-hotplug operation, but it doesn't guarantee serialized access to any of the local data structures. Hence the changes needs to be reviewed. In case of pseries_add_processor/pseries_remove_processor, use cpu_maps_update_begin()/cpu_maps_update_done() as we're modifying the cpu_present_map there. Signed-off-by: Gautham R Shenoy Signed-off-by: Ingo Molnar --- Documentation/cpu-hotplug.txt | 11 ++++++----- arch/mips/kernel/mips-mt-fpaff.c | 10 +++++----- arch/powerpc/platforms/pseries/hotplug-cpu.c | 8 ++++---- arch/powerpc/platforms/pseries/rtasd.c | 8 ++++---- arch/x86/kernel/cpu/mtrr/main.c | 8 ++++---- arch/x86/kernel/microcode.c | 16 ++++++++-------- drivers/lguest/x86/core.c | 8 ++++---- drivers/s390/char/sclp_config.c | 4 ++-- include/linux/cpu.h | 8 ++++---- kernel/cpu.c | 10 +++++----- kernel/cpuset.c | 14 +++++++------- kernel/rcutorture.c | 6 +++--- kernel/sched.c | 4 ++-- kernel/stop_machine.c | 4 ++-- net/core/flow.c | 4 ++-- 15 files changed, 62 insertions(+), 61 deletions(-) (limited to 'arch/x86/kernel') diff --git a/Documentation/cpu-hotplug.txt b/Documentation/cpu-hotplug.txt index a741f658a3c9..fb94f5a71b68 100644 --- a/Documentation/cpu-hotplug.txt +++ b/Documentation/cpu-hotplug.txt @@ -109,12 +109,13 @@ Never use anything other than cpumask_t to represent bitmap of CPUs. for_each_cpu_mask(x,mask) - Iterate over some random collection of cpu mask. #include - lock_cpu_hotplug() and unlock_cpu_hotplug(): + get_online_cpus() and put_online_cpus(): -The above calls are used to inhibit cpu hotplug operations. While holding the -cpucontrol mutex, cpu_online_map will not change. If you merely need to avoid -cpus going away, you could also use preempt_disable() and preempt_enable() -for those sections. Just remember the critical section cannot call any +The above calls are used to inhibit cpu hotplug operations. While the +cpu_hotplug.refcount is non zero, the cpu_online_map will not change. +If you merely need to avoid cpus going away, you could also use +preempt_disable() and preempt_enable() for those sections. +Just remember the critical section cannot call any function that can sleep or schedule this process away. The preempt_disable() will work as long as stop_machine_run() is used to take a cpu down. diff --git a/arch/mips/kernel/mips-mt-fpaff.c b/arch/mips/kernel/mips-mt-fpaff.c index 892665bb12b1..bb4f00c0cbe9 100644 --- a/arch/mips/kernel/mips-mt-fpaff.c +++ b/arch/mips/kernel/mips-mt-fpaff.c @@ -58,13 +58,13 @@ asmlinkage long mipsmt_sys_sched_setaffinity(pid_t pid, unsigned int len, if (copy_from_user(&new_mask, user_mask_ptr, sizeof(new_mask))) return -EFAULT; - lock_cpu_hotplug(); + get_online_cpus(); read_lock(&tasklist_lock); p = find_process_by_pid(pid); if (!p) { read_unlock(&tasklist_lock); - unlock_cpu_hotplug(); + put_online_cpus(); return -ESRCH; } @@ -106,7 +106,7 @@ asmlinkage long mipsmt_sys_sched_setaffinity(pid_t pid, unsigned int len, out_unlock: put_task_struct(p); - unlock_cpu_hotplug(); + put_online_cpus(); return retval; } @@ -125,7 +125,7 @@ asmlinkage long mipsmt_sys_sched_getaffinity(pid_t pid, unsigned int len, if (len < real_len) return -EINVAL; - lock_cpu_hotplug(); + get_online_cpus(); read_lock(&tasklist_lock); retval = -ESRCH; @@ -140,7 +140,7 @@ asmlinkage long mipsmt_sys_sched_getaffinity(pid_t pid, unsigned int len, out_unlock: read_unlock(&tasklist_lock); - unlock_cpu_hotplug(); + put_online_cpus(); if (retval) return retval; if (copy_to_user(user_mask_ptr, &mask, real_len)) diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c index 412e6b42986f..c4ad54e0f288 100644 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c @@ -153,7 +153,7 @@ static int pseries_add_processor(struct device_node *np) for (i = 0; i < nthreads; i++) cpu_set(i, tmp); - lock_cpu_hotplug(); + cpu_maps_update_begin(); BUG_ON(!cpus_subset(cpu_present_map, cpu_possible_map)); @@ -190,7 +190,7 @@ static int pseries_add_processor(struct device_node *np) } err = 0; out_unlock: - unlock_cpu_hotplug(); + cpu_maps_update_done(); return err; } @@ -211,7 +211,7 @@ static void pseries_remove_processor(struct device_node *np) nthreads = len / sizeof(u32); - lock_cpu_hotplug(); + cpu_maps_update_begin(); for (i = 0; i < nthreads; i++) { for_each_present_cpu(cpu) { if (get_hard_smp_processor_id(cpu) != intserv[i]) @@ -225,7 +225,7 @@ static void pseries_remove_processor(struct device_node *np) printk(KERN_WARNING "Could not find cpu to remove " "with physical id 0x%x\n", intserv[i]); } - unlock_cpu_hotplug(); + cpu_maps_update_done(); } static int pseries_smp_notifier(struct notifier_block *nb, diff --git a/arch/powerpc/platforms/pseries/rtasd.c b/arch/powerpc/platforms/pseries/rtasd.c index 73401c820110..e3078ce41518 100644 --- a/arch/powerpc/platforms/pseries/rtasd.c +++ b/arch/powerpc/platforms/pseries/rtasd.c @@ -382,7 +382,7 @@ static void do_event_scan_all_cpus(long delay) { int cpu; - lock_cpu_hotplug(); + get_online_cpus(); cpu = first_cpu(cpu_online_map); for (;;) { set_cpus_allowed(current, cpumask_of_cpu(cpu)); @@ -390,15 +390,15 @@ static void do_event_scan_all_cpus(long delay) set_cpus_allowed(current, CPU_MASK_ALL); /* Drop hotplug lock, and sleep for the specified delay */ - unlock_cpu_hotplug(); + put_online_cpus(); msleep_interruptible(delay); - lock_cpu_hotplug(); + get_online_cpus(); cpu = next_cpu(cpu, cpu_online_map); if (cpu == NR_CPUS) break; } - unlock_cpu_hotplug(); + put_online_cpus(); } static int rtasd(void *unused) diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 3b20613325dc..beb45c9c0835 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -349,7 +349,7 @@ int mtrr_add_page(unsigned long base, unsigned long size, replace = -1; /* No CPU hotplug when we change MTRR entries */ - lock_cpu_hotplug(); + get_online_cpus(); /* Search for existing MTRR */ mutex_lock(&mtrr_mutex); for (i = 0; i < num_var_ranges; ++i) { @@ -405,7 +405,7 @@ int mtrr_add_page(unsigned long base, unsigned long size, error = i; out: mutex_unlock(&mtrr_mutex); - unlock_cpu_hotplug(); + put_online_cpus(); return error; } @@ -495,7 +495,7 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size) max = num_var_ranges; /* No CPU hotplug when we change MTRR entries */ - lock_cpu_hotplug(); + get_online_cpus(); mutex_lock(&mtrr_mutex); if (reg < 0) { /* Search for existing MTRR */ @@ -536,7 +536,7 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size) error = reg; out: mutex_unlock(&mtrr_mutex); - unlock_cpu_hotplug(); + put_online_cpus(); return error; } /** diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c index 09c315214a5e..40cfd5488719 100644 --- a/arch/x86/kernel/microcode.c +++ b/arch/x86/kernel/microcode.c @@ -436,7 +436,7 @@ static ssize_t microcode_write (struct file *file, const char __user *buf, size_ return -EINVAL; } - lock_cpu_hotplug(); + get_online_cpus(); mutex_lock(µcode_mutex); user_buffer = (void __user *) buf; @@ -447,7 +447,7 @@ static ssize_t microcode_write (struct file *file, const char __user *buf, size_ ret = (ssize_t)len; mutex_unlock(µcode_mutex); - unlock_cpu_hotplug(); + put_online_cpus(); return ret; } @@ -658,14 +658,14 @@ static ssize_t reload_store(struct sys_device *dev, const char *buf, size_t sz) old = current->cpus_allowed; - lock_cpu_hotplug(); + get_online_cpus(); set_cpus_allowed(current, cpumask_of_cpu(cpu)); mutex_lock(µcode_mutex); if (uci->valid) err = cpu_request_microcode(cpu); mutex_unlock(µcode_mutex); - unlock_cpu_hotplug(); + put_online_cpus(); set_cpus_allowed(current, old); } if (err) @@ -817,9 +817,9 @@ static int __init microcode_init (void) return PTR_ERR(microcode_pdev); } - lock_cpu_hotplug(); + get_online_cpus(); error = sysdev_driver_register(&cpu_sysdev_class, &mc_sysdev_driver); - unlock_cpu_hotplug(); + put_online_cpus(); if (error) { microcode_dev_exit(); platform_device_unregister(microcode_pdev); @@ -839,9 +839,9 @@ static void __exit microcode_exit (void) unregister_hotcpu_notifier(&mc_cpu_notifier); - lock_cpu_hotplug(); + get_online_cpus(); sysdev_driver_unregister(&cpu_sysdev_class, &mc_sysdev_driver); - unlock_cpu_hotplug(); + put_online_cpus(); platform_device_unregister(microcode_pdev); } diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c index 482aec2a9631..96d0fd07c57d 100644 --- a/drivers/lguest/x86/core.c +++ b/drivers/lguest/x86/core.c @@ -459,7 +459,7 @@ void __init lguest_arch_host_init(void) /* We don't need the complexity of CPUs coming and going while we're * doing this. */ - lock_cpu_hotplug(); + get_online_cpus(); if (cpu_has_pge) { /* We have a broader idea of "global". */ /* Remember that this was originally set (for cleanup). */ cpu_had_pge = 1; @@ -469,20 +469,20 @@ void __init lguest_arch_host_init(void) /* Turn off the feature in the global feature set. */ clear_bit(X86_FEATURE_PGE, boot_cpu_data.x86_capability); } - unlock_cpu_hotplug(); + put_online_cpus(); }; /*:*/ void __exit lguest_arch_host_fini(void) { /* If we had PGE before we started, turn it back on now. */ - lock_cpu_hotplug(); + get_online_cpus(); if (cpu_had_pge) { set_bit(X86_FEATURE_PGE, boot_cpu_data.x86_capability); /* adjust_pge's argument "1" means set PGE. */ on_each_cpu(adjust_pge, (void *)1, 0, 1); } - unlock_cpu_hotplug(); + put_online_cpus(); } diff --git a/drivers/s390/char/sclp_config.c b/drivers/s390/char/sclp_config.c index 5322e5e54a98..9dc77f14fa52 100644 --- a/drivers/s390/char/sclp_config.c +++ b/drivers/s390/char/sclp_config.c @@ -29,12 +29,12 @@ static void sclp_cpu_capability_notify(struct work_struct *work) struct sys_device *sysdev; printk(KERN_WARNING TAG "cpu capability changed.\n"); - lock_cpu_hotplug(); + get_online_cpus(); for_each_online_cpu(cpu) { sysdev = get_cpu_sysdev(cpu); kobject_uevent(&sysdev->kobj, KOBJ_CHANGE); } - unlock_cpu_hotplug(); + put_online_cpus(); } static void sclp_conf_receiver_fn(struct evbuf_header *evbuf) diff --git a/include/linux/cpu.h b/include/linux/cpu.h index a40247e4d462..3a3ff1c5cbef 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -100,8 +100,8 @@ static inline void cpuhotplug_mutex_unlock(struct mutex *cpu_hp_mutex) mutex_unlock(cpu_hp_mutex); } -extern void lock_cpu_hotplug(void); -extern void unlock_cpu_hotplug(void); +extern void get_online_cpus(void); +extern void put_online_cpus(void); #define hotcpu_notifier(fn, pri) { \ static struct notifier_block fn##_nb = \ { .notifier_call = fn, .priority = pri }; \ @@ -118,8 +118,8 @@ static inline void cpuhotplug_mutex_lock(struct mutex *cpu_hp_mutex) static inline void cpuhotplug_mutex_unlock(struct mutex *cpu_hp_mutex) { } -#define lock_cpu_hotplug() do { } while (0) -#define unlock_cpu_hotplug() do { } while (0) +#define get_online_cpus() do { } while (0) +#define put_online_cpus() do { } while (0) #define hotcpu_notifier(fn, pri) do { (void)(fn); } while (0) /* These aren't inline functions due to a GCC bug. */ #define register_hotcpu_notifier(nb) ({ (void)(nb); 0; }) diff --git a/kernel/cpu.c b/kernel/cpu.c index 656dc3fcbbae..b0c4152995f8 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -48,7 +48,7 @@ void __init cpu_hotplug_init(void) #ifdef CONFIG_HOTPLUG_CPU -void lock_cpu_hotplug(void) +void get_online_cpus(void) { might_sleep(); if (cpu_hotplug.active_writer == current) @@ -58,9 +58,9 @@ void lock_cpu_hotplug(void) mutex_unlock(&cpu_hotplug.lock); } -EXPORT_SYMBOL_GPL(lock_cpu_hotplug); +EXPORT_SYMBOL_GPL(get_online_cpus); -void unlock_cpu_hotplug(void) +void put_online_cpus(void) { if (cpu_hotplug.active_writer == current) return; @@ -73,7 +73,7 @@ void unlock_cpu_hotplug(void) mutex_unlock(&cpu_hotplug.lock); } -EXPORT_SYMBOL_GPL(unlock_cpu_hotplug); +EXPORT_SYMBOL_GPL(put_online_cpus); #endif /* CONFIG_HOTPLUG_CPU */ @@ -110,7 +110,7 @@ void cpu_maps_update_done(void) * non zero and goes to sleep again. * * However, this is very difficult to achieve in practice since - * lock_cpu_hotplug() not an api which is called all that often. + * get_online_cpus() not an api which is called all that often. * */ static void cpu_hotplug_begin(void) diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 50f5dc463688..cfaf6419d817 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -537,10 +537,10 @@ static int cpusets_overlap(struct cpuset *a, struct cpuset *b) * * Call with cgroup_mutex held. May take callback_mutex during * call due to the kfifo_alloc() and kmalloc() calls. May nest - * a call to the lock_cpu_hotplug()/unlock_cpu_hotplug() pair. + * a call to the get_online_cpus()/put_online_cpus() pair. * Must not be called holding callback_mutex, because we must not - * call lock_cpu_hotplug() while holding callback_mutex. Elsewhere - * the kernel nests callback_mutex inside lock_cpu_hotplug() calls. + * call get_online_cpus() while holding callback_mutex. Elsewhere + * the kernel nests callback_mutex inside get_online_cpus() calls. * So the reverse nesting would risk an ABBA deadlock. * * The three key local variables below are: @@ -691,9 +691,9 @@ restart: rebuild: /* Have scheduler rebuild sched domains */ - lock_cpu_hotplug(); + get_online_cpus(); partition_sched_domains(ndoms, doms); - unlock_cpu_hotplug(); + put_online_cpus(); done: if (q && !IS_ERR(q)) @@ -1617,10 +1617,10 @@ static struct cgroup_subsys_state *cpuset_create( * * If the cpuset being removed has its flag 'sched_load_balance' * enabled, then simulate turning sched_load_balance off, which - * will call rebuild_sched_domains(). The lock_cpu_hotplug() + * will call rebuild_sched_domains(). The get_online_cpus() * call in rebuild_sched_domains() must not be made while holding * callback_mutex. Elsewhere the kernel nests callback_mutex inside - * lock_cpu_hotplug() calls. So the reverse nesting would risk an + * get_online_cpus() calls. So the reverse nesting would risk an * ABBA deadlock. */ diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index c3e165c2318f..fd599829e72a 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c @@ -726,11 +726,11 @@ static void rcu_torture_shuffle_tasks(void) cpumask_t tmp_mask = CPU_MASK_ALL; int i; - lock_cpu_hotplug(); + get_online_cpus(); /* No point in shuffling if there is only one online CPU (ex: UP) */ if (num_online_cpus() == 1) { - unlock_cpu_hotplug(); + put_online_cpus(); return; } @@ -762,7 +762,7 @@ static void rcu_torture_shuffle_tasks(void) else rcu_idle_cpu--; - unlock_cpu_hotplug(); + put_online_cpus(); } /* Shuffle tasks across CPUs, with the intent of allowing each CPU in the diff --git a/kernel/sched.c b/kernel/sched.c index 86e55a9c2de6..672aa68bfeac 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -7152,7 +7152,7 @@ static int load_balance_monitor(void *unused) int i, cpu, balanced = 1; /* Prevent cpus going down or coming up */ - lock_cpu_hotplug(); + get_online_cpus(); /* lockout changes to doms_cur[] array */ lock_doms_cur(); /* @@ -7186,7 +7186,7 @@ static int load_balance_monitor(void *unused) rcu_read_unlock(); unlock_doms_cur(); - unlock_cpu_hotplug(); + put_online_cpus(); if (!balanced) timeout = sysctl_sched_min_bal_int_shares; diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index 319821ef78af..51b5ee53571a 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c @@ -203,13 +203,13 @@ int stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu) int ret; /* No CPUs can come up or down during this. */ - lock_cpu_hotplug(); + get_online_cpus(); p = __stop_machine_run(fn, data, cpu); if (!IS_ERR(p)) ret = kthread_stop(p); else ret = PTR_ERR(p); - unlock_cpu_hotplug(); + put_online_cpus(); return ret; } diff --git a/net/core/flow.c b/net/core/flow.c index 3ed2b4b1d6d4..6489f4e24ecf 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -293,7 +293,7 @@ void flow_cache_flush(void) static DEFINE_MUTEX(flow_flush_sem); /* Don't want cpus going down or up during this. */ - lock_cpu_hotplug(); + get_online_cpus(); mutex_lock(&flow_flush_sem); atomic_set(&info.cpuleft, num_online_cpus()); init_completion(&info.completion); @@ -305,7 +305,7 @@ void flow_cache_flush(void) wait_for_completion(&info.completion); mutex_unlock(&flow_flush_sem); - unlock_cpu_hotplug(); + put_online_cpus(); } static void __devinit flow_cache_cpu_prepare(int cpu) -- cgit v1.2.3 From 8f4d37ec073c17e2d4aa8851df5837d798606d6f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 25 Jan 2008 21:08:29 +0100 Subject: sched: high-res preemption tick Use HR-timers (when available) to deliver an accurate preemption tick. The regular scheduler tick that runs at 1/HZ can be too coarse when nice level are used. The fairness system will still keep the cpu utilisation 'fair' by then delaying the task that got an excessive amount of CPU time but try to minimize this by delivering preemption points spot-on. The average frequency of this extra interrupt is sched_latency / nr_latency. Which need not be higher than 1/HZ, its just that the distribution within the sched_latency period is important. Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- arch/x86/kernel/entry_64.S | 6 +- arch/x86/kernel/signal_32.c | 3 + arch/x86/kernel/signal_64.c | 3 + include/asm-x86/thread_info_32.h | 2 + include/asm-x86/thread_info_64.h | 5 + include/linux/hrtimer.h | 9 ++ include/linux/sched.h | 3 +- kernel/Kconfig.hz | 2 + kernel/sched.c | 210 +++++++++++++++++++++++++++++++++++++-- kernel/sched_fair.c | 69 ++++++++++++- kernel/sched_idletask.c | 2 +- kernel/sched_rt.c | 2 +- 12 files changed, 295 insertions(+), 21 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 3a058bb16409..e70f3881d7e4 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -283,7 +283,7 @@ sysret_careful: sysret_signal: TRACE_IRQS_ON sti - testl $(_TIF_SIGPENDING|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY),%edx + testl $_TIF_DO_NOTIFY_MASK,%edx jz 1f /* Really a signal */ @@ -377,7 +377,7 @@ int_very_careful: jmp int_restore_rest int_signal: - testl $(_TIF_SIGPENDING|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY),%edx + testl $_TIF_DO_NOTIFY_MASK,%edx jz 1f movq %rsp,%rdi # &ptregs -> arg1 xorl %esi,%esi # oldset -> arg2 @@ -603,7 +603,7 @@ retint_careful: jmp retint_check retint_signal: - testl $(_TIF_SIGPENDING|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY),%edx + testl $_TIF_DO_NOTIFY_MASK,%edx jz retint_swapgs TRACE_IRQS_ON sti diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index 9bdd83022f5f..20f29e4c1d33 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -658,6 +658,9 @@ void do_notify_resume(struct pt_regs *regs, void *_unused, /* deal with pending signal delivery */ if (thread_info_flags & (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK)) do_signal(regs); + + if (thread_info_flags & _TIF_HRTICK_RESCHED) + hrtick_resched(); clear_thread_flag(TIF_IRET); } diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index ab086b0357fc..38d806467c0f 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -480,6 +480,9 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) /* deal with pending signal delivery */ if (thread_info_flags & (_TIF_SIGPENDING|_TIF_RESTORE_SIGMASK)) do_signal(regs); + + if (thread_info_flags & _TIF_HRTICK_RESCHED) + hrtick_resched(); } void signal_fault(struct pt_regs *regs, void __user *frame, char *where) diff --git a/include/asm-x86/thread_info_32.h b/include/asm-x86/thread_info_32.h index 22a8cbcd35e2..ef58fd2a6eb0 100644 --- a/include/asm-x86/thread_info_32.h +++ b/include/asm-x86/thread_info_32.h @@ -132,6 +132,7 @@ static inline struct thread_info *current_thread_info(void) #define TIF_SYSCALL_AUDIT 6 /* syscall auditing active */ #define TIF_SECCOMP 7 /* secure computing */ #define TIF_RESTORE_SIGMASK 8 /* restore signal mask in do_signal() */ +#define TIF_HRTICK_RESCHED 9 /* reprogram hrtick timer */ #define TIF_MEMDIE 16 #define TIF_DEBUG 17 /* uses debug registers */ #define TIF_IO_BITMAP 18 /* uses I/O bitmap */ @@ -147,6 +148,7 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_SYSCALL_AUDIT (1<base->get_time(); } +static inline int hrtimer_is_hres_active(struct hrtimer *timer) +{ + return timer->base->cpu_base->hres_active; +} + /* * The resolution of the clocks. The resolution value is returned in * the clock_getres() system call to give application programmers an @@ -248,6 +253,10 @@ static inline ktime_t hrtimer_cb_get_time(struct hrtimer *timer) return timer->base->softirq_time; } +static inline int hrtimer_is_hres_active(struct hrtimer *timer) +{ + return 0; +} #endif extern ktime_t ktime_get(void); diff --git a/include/linux/sched.h b/include/linux/sched.h index 7907845c2348..43e0339d65fc 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -257,6 +257,7 @@ extern void trap_init(void); extern void account_process_tick(struct task_struct *task, int user); extern void update_process_times(int user); extern void scheduler_tick(void); +extern void hrtick_resched(void); extern void sched_show_task(struct task_struct *p); @@ -849,7 +850,7 @@ struct sched_class { #endif void (*set_curr_task) (struct rq *rq); - void (*task_tick) (struct rq *rq, struct task_struct *p); + void (*task_tick) (struct rq *rq, struct task_struct *p, int queued); void (*task_new) (struct rq *rq, struct task_struct *p); void (*set_cpus_allowed)(struct task_struct *p, cpumask_t *newmask); diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz index 4af15802ccd4..526128a2e622 100644 --- a/kernel/Kconfig.hz +++ b/kernel/Kconfig.hz @@ -54,3 +54,5 @@ config HZ default 300 if HZ_300 default 1000 if HZ_1000 +config SCHED_HRTICK + def_bool HIGH_RES_TIMERS && X86 diff --git a/kernel/sched.c b/kernel/sched.c index 6ee37602a6d8..17f93d3eda91 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -65,6 +65,7 @@ #include #include #include +#include #include #include @@ -451,6 +452,12 @@ struct rq { struct list_head migration_queue; #endif +#ifdef CONFIG_SCHED_HRTICK + unsigned long hrtick_flags; + ktime_t hrtick_expire; + struct hrtimer hrtick_timer; +#endif + #ifdef CONFIG_SCHEDSTATS /* latency stats */ struct sched_info rq_sched_info; @@ -572,6 +579,8 @@ enum { SCHED_FEAT_START_DEBIT = 4, SCHED_FEAT_TREE_AVG = 8, SCHED_FEAT_APPROX_AVG = 16, + SCHED_FEAT_HRTICK = 32, + SCHED_FEAT_DOUBLE_TICK = 64, }; const_debug unsigned int sysctl_sched_features = @@ -579,7 +588,9 @@ const_debug unsigned int sysctl_sched_features = SCHED_FEAT_WAKEUP_PREEMPT * 1 | SCHED_FEAT_START_DEBIT * 1 | SCHED_FEAT_TREE_AVG * 0 | - SCHED_FEAT_APPROX_AVG * 0; + SCHED_FEAT_APPROX_AVG * 0 | + SCHED_FEAT_HRTICK * 1 | + SCHED_FEAT_DOUBLE_TICK * 0; #define sched_feat(x) (sysctl_sched_features & SCHED_FEAT_##x) @@ -796,6 +807,173 @@ void sched_clock_idle_wakeup_event(u64 delta_ns) } EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event); +static void __resched_task(struct task_struct *p, int tif_bit); + +static inline void resched_task(struct task_struct *p) +{ + __resched_task(p, TIF_NEED_RESCHED); +} + +#ifdef CONFIG_SCHED_HRTICK +/* + * Use HR-timers to deliver accurate preemption points. + * + * Its all a bit involved since we cannot program an hrt while holding the + * rq->lock. So what we do is store a state in in rq->hrtick_* and ask for a + * reschedule event. + * + * When we get rescheduled we reprogram the hrtick_timer outside of the + * rq->lock. + */ +static inline void resched_hrt(struct task_struct *p) +{ + __resched_task(p, TIF_HRTICK_RESCHED); +} + +static inline void resched_rq(struct rq *rq) +{ + unsigned long flags; + + spin_lock_irqsave(&rq->lock, flags); + resched_task(rq->curr); + spin_unlock_irqrestore(&rq->lock, flags); +} + +enum { + HRTICK_SET, /* re-programm hrtick_timer */ + HRTICK_RESET, /* not a new slice */ +}; + +/* + * Use hrtick when: + * - enabled by features + * - hrtimer is actually high res + */ +static inline int hrtick_enabled(struct rq *rq) +{ + if (!sched_feat(HRTICK)) + return 0; + return hrtimer_is_hres_active(&rq->hrtick_timer); +} + +/* + * Called to set the hrtick timer state. + * + * called with rq->lock held and irqs disabled + */ +static void hrtick_start(struct rq *rq, u64 delay, int reset) +{ + assert_spin_locked(&rq->lock); + + /* + * preempt at: now + delay + */ + rq->hrtick_expire = + ktime_add_ns(rq->hrtick_timer.base->get_time(), delay); + /* + * indicate we need to program the timer + */ + __set_bit(HRTICK_SET, &rq->hrtick_flags); + if (reset) + __set_bit(HRTICK_RESET, &rq->hrtick_flags); + + /* + * New slices are called from the schedule path and don't need a + * forced reschedule. + */ + if (reset) + resched_hrt(rq->curr); +} + +static void hrtick_clear(struct rq *rq) +{ + if (hrtimer_active(&rq->hrtick_timer)) + hrtimer_cancel(&rq->hrtick_timer); +} + +/* + * Update the timer from the possible pending state. + */ +static void hrtick_set(struct rq *rq) +{ + ktime_t time; + int set, reset; + unsigned long flags; + + WARN_ON_ONCE(cpu_of(rq) != smp_processor_id()); + + spin_lock_irqsave(&rq->lock, flags); + set = __test_and_clear_bit(HRTICK_SET, &rq->hrtick_flags); + reset = __test_and_clear_bit(HRTICK_RESET, &rq->hrtick_flags); + time = rq->hrtick_expire; + clear_thread_flag(TIF_HRTICK_RESCHED); + spin_unlock_irqrestore(&rq->lock, flags); + + if (set) { + hrtimer_start(&rq->hrtick_timer, time, HRTIMER_MODE_ABS); + if (reset && !hrtimer_active(&rq->hrtick_timer)) + resched_rq(rq); + } else + hrtick_clear(rq); +} + +/* + * High-resolution timer tick. + * Runs from hardirq context with interrupts disabled. + */ +static enum hrtimer_restart hrtick(struct hrtimer *timer) +{ + struct rq *rq = container_of(timer, struct rq, hrtick_timer); + + WARN_ON_ONCE(cpu_of(rq) != smp_processor_id()); + + spin_lock(&rq->lock); + __update_rq_clock(rq); + rq->curr->sched_class->task_tick(rq, rq->curr, 1); + spin_unlock(&rq->lock); + + return HRTIMER_NORESTART; +} + +static inline void init_rq_hrtick(struct rq *rq) +{ + rq->hrtick_flags = 0; + hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + rq->hrtick_timer.function = hrtick; + rq->hrtick_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ; +} + +void hrtick_resched(void) +{ + struct rq *rq; + unsigned long flags; + + if (!test_thread_flag(TIF_HRTICK_RESCHED)) + return; + + local_irq_save(flags); + rq = cpu_rq(smp_processor_id()); + hrtick_set(rq); + local_irq_restore(flags); +} +#else +static inline void hrtick_clear(struct rq *rq) +{ +} + +static inline void hrtick_set(struct rq *rq) +{ +} + +static inline void init_rq_hrtick(struct rq *rq) +{ +} + +void hrtick_resched(void) +{ +} +#endif + /* * resched_task - mark a task 'to be rescheduled now'. * @@ -809,16 +987,16 @@ EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event); #define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG) #endif -static void resched_task(struct task_struct *p) +static void __resched_task(struct task_struct *p, int tif_bit) { int cpu; assert_spin_locked(&task_rq(p)->lock); - if (unlikely(test_tsk_thread_flag(p, TIF_NEED_RESCHED))) + if (unlikely(test_tsk_thread_flag(p, tif_bit))) return; - set_tsk_thread_flag(p, TIF_NEED_RESCHED); + set_tsk_thread_flag(p, tif_bit); cpu = task_cpu(p); if (cpu == smp_processor_id()) @@ -841,10 +1019,10 @@ static void resched_cpu(int cpu) spin_unlock_irqrestore(&rq->lock, flags); } #else -static inline void resched_task(struct task_struct *p) +static void __resched_task(struct task_struct *p, int tif_bit) { assert_spin_locked(&task_rq(p)->lock); - set_tsk_need_resched(p); + set_tsk_thread_flag(p, tif_bit); } #endif @@ -3497,7 +3675,7 @@ void scheduler_tick(void) rq->tick_timestamp = rq->clock; update_cpu_load(rq); if (curr != rq->idle) /* FIXME: needed? */ - curr->sched_class->task_tick(rq, curr); + curr->sched_class->task_tick(rq, curr, 0); spin_unlock(&rq->lock); #ifdef CONFIG_SMP @@ -3643,6 +3821,8 @@ need_resched_nonpreemptible: schedule_debug(prev); + hrtick_clear(rq); + /* * Do the rq-clock update outside the rq lock: */ @@ -3680,14 +3860,20 @@ need_resched_nonpreemptible: ++*switch_count; context_switch(rq, prev, next); /* unlocks the rq */ + /* + * the context switch might have flipped the stack from under + * us, hence refresh the local variables. + */ + cpu = smp_processor_id(); + rq = cpu_rq(cpu); } else spin_unlock_irq(&rq->lock); - if (unlikely(reacquire_kernel_lock(current) < 0)) { - cpu = smp_processor_id(); - rq = cpu_rq(cpu); + hrtick_set(rq); + + if (unlikely(reacquire_kernel_lock(current) < 0)) goto need_resched_nonpreemptible; - } + preempt_enable_no_resched(); if (unlikely(test_thread_flag(TIF_NEED_RESCHED))) goto need_resched; @@ -6913,6 +7099,8 @@ void __init sched_init(void) rq->rt.overloaded = 0; rq_attach_root(rq, &def_root_domain); #endif + init_rq_hrtick(rq); + atomic_set(&rq->nr_iowait, 0); array = &rq->rt.active; diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index dfa18d55561d..3dab1ff83c4f 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -642,13 +642,29 @@ static void put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev) cfs_rq->curr = NULL; } -static void entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr) +static void +entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued) { /* * Update run-time statistics of the 'current'. */ update_curr(cfs_rq); +#ifdef CONFIG_SCHED_HRTICK + /* + * queued ticks are scheduled to match the slice, so don't bother + * validating it and just reschedule. + */ + if (queued) + return resched_task(rq_of(cfs_rq)->curr); + /* + * don't let the period tick interfere with the hrtick preemption + */ + if (!sched_feat(DOUBLE_TICK) && + hrtimer_active(&rq_of(cfs_rq)->hrtick_timer)) + return; +#endif + if (cfs_rq->nr_running > 1 || !sched_feat(WAKEUP_PREEMPT)) check_preempt_tick(cfs_rq, curr); } @@ -754,6 +770,43 @@ static inline struct sched_entity *parent_entity(struct sched_entity *se) #endif /* CONFIG_FAIR_GROUP_SCHED */ +#ifdef CONFIG_SCHED_HRTICK +static void hrtick_start_fair(struct rq *rq, struct task_struct *p) +{ + int requeue = rq->curr == p; + struct sched_entity *se = &p->se; + struct cfs_rq *cfs_rq = cfs_rq_of(se); + + WARN_ON(task_rq(p) != rq); + + if (hrtick_enabled(rq) && cfs_rq->nr_running > 1) { + u64 slice = sched_slice(cfs_rq, se); + u64 ran = se->sum_exec_runtime - se->prev_sum_exec_runtime; + s64 delta = slice - ran; + + if (delta < 0) { + if (rq->curr == p) + resched_task(p); + return; + } + + /* + * Don't schedule slices shorter than 10000ns, that just + * doesn't make sense. Rely on vruntime for fairness. + */ + if (!requeue) + delta = max(10000LL, delta); + + hrtick_start(rq, delta, requeue); + } +} +#else +static inline void +hrtick_start_fair(struct rq *rq, struct task_struct *p) +{ +} +#endif + /* * The enqueue_task method is called before nr_running is * increased. Here we update the fair scheduling stats and @@ -782,6 +835,8 @@ static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup) */ if (incload) inc_cpu_load(rq, topse->load.weight); + + hrtick_start_fair(rq, rq->curr); } /* @@ -814,6 +869,8 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int sleep) */ if (decload) dec_cpu_load(rq, topse->load.weight); + + hrtick_start_fair(rq, rq->curr); } /* @@ -1049,6 +1106,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p) static struct task_struct *pick_next_task_fair(struct rq *rq) { + struct task_struct *p; struct cfs_rq *cfs_rq = &rq->cfs; struct sched_entity *se; @@ -1060,7 +1118,10 @@ static struct task_struct *pick_next_task_fair(struct rq *rq) cfs_rq = group_cfs_rq(se); } while (cfs_rq); - return task_of(se); + p = task_of(se); + hrtick_start_fair(rq, p); + + return p; } /* @@ -1235,14 +1296,14 @@ move_one_task_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, /* * scheduler tick hitting a task of our scheduling class: */ -static void task_tick_fair(struct rq *rq, struct task_struct *curr) +static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued) { struct cfs_rq *cfs_rq; struct sched_entity *se = &curr->se; for_each_sched_entity(se) { cfs_rq = cfs_rq_of(se); - entity_tick(cfs_rq, se); + entity_tick(cfs_rq, se, queued); } } diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c index ef7a2661fa10..2bcafa375633 100644 --- a/kernel/sched_idletask.c +++ b/kernel/sched_idletask.c @@ -61,7 +61,7 @@ move_one_task_idle(struct rq *this_rq, int this_cpu, struct rq *busiest, } #endif -static void task_tick_idle(struct rq *rq, struct task_struct *curr) +static void task_tick_idle(struct rq *rq, struct task_struct *curr, int queued) { } diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index f350f7b15158..83fbbcb8019e 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -863,7 +863,7 @@ static void watchdog(struct rq *rq, struct task_struct *p) } } -static void task_tick_rt(struct rq *rq, struct task_struct *p) +static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued) { update_curr_rt(rq); -- cgit v1.2.3 From 9745512ce79de686df354dc70a8d1a74d801892d Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Fri, 25 Jan 2008 21:08:34 +0100 Subject: sched: latencytop support LatencyTOP kernel infrastructure; it measures latencies in the scheduler and tracks it system wide and per process. Signed-off-by: Arjan van de Ven Signed-off-by: Ingo Molnar --- arch/x86/kernel/stacktrace.c | 27 +++++ fs/proc/base.c | 78 ++++++++++++++ include/linux/latencytop.h | 44 ++++++++ include/linux/sched.h | 5 + include/linux/stacktrace.h | 3 + kernel/Makefile | 1 + kernel/fork.c | 1 + kernel/latencytop.c | 239 +++++++++++++++++++++++++++++++++++++++++++ kernel/sched_fair.c | 8 +- kernel/sysctl.c | 10 ++ lib/Kconfig.debug | 14 +++ 11 files changed, 429 insertions(+), 1 deletion(-) create mode 100644 include/linux/latencytop.h create mode 100644 kernel/latencytop.c (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c index 6fa6cf036c70..55771fd7e545 100644 --- a/arch/x86/kernel/stacktrace.c +++ b/arch/x86/kernel/stacktrace.c @@ -33,6 +33,19 @@ static void save_stack_address(void *data, unsigned long addr) trace->entries[trace->nr_entries++] = addr; } +static void save_stack_address_nosched(void *data, unsigned long addr) +{ + struct stack_trace *trace = (struct stack_trace *)data; + if (in_sched_functions(addr)) + return; + if (trace->skip > 0) { + trace->skip--; + return; + } + if (trace->nr_entries < trace->max_entries) + trace->entries[trace->nr_entries++] = addr; +} + static const struct stacktrace_ops save_stack_ops = { .warning = save_stack_warning, .warning_symbol = save_stack_warning_symbol, @@ -40,6 +53,13 @@ static const struct stacktrace_ops save_stack_ops = { .address = save_stack_address, }; +static const struct stacktrace_ops save_stack_ops_nosched = { + .warning = save_stack_warning, + .warning_symbol = save_stack_warning_symbol, + .stack = save_stack_stack, + .address = save_stack_address_nosched, +}; + /* * Save stack-backtrace addresses into a stack_trace buffer. */ @@ -50,3 +70,10 @@ void save_stack_trace(struct stack_trace *trace) trace->entries[trace->nr_entries++] = ULONG_MAX; } EXPORT_SYMBOL(save_stack_trace); + +void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) +{ + dump_trace(tsk, NULL, NULL, &save_stack_ops_nosched, trace); + if (trace->nr_entries < trace->max_entries) + trace->entries[trace->nr_entries++] = ULONG_MAX; +} diff --git a/fs/proc/base.c b/fs/proc/base.c index 7411bfb0b7cc..91fa8e6ce8ad 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -310,6 +310,77 @@ static int proc_pid_schedstat(struct task_struct *task, char *buffer) } #endif +#ifdef CONFIG_LATENCYTOP +static int lstats_show_proc(struct seq_file *m, void *v) +{ + int i; + struct task_struct *task = m->private; + seq_puts(m, "Latency Top version : v0.1\n"); + + for (i = 0; i < 32; i++) { + if (task->latency_record[i].backtrace[0]) { + int q; + seq_printf(m, "%i %li %li ", + task->latency_record[i].count, + task->latency_record[i].time, + task->latency_record[i].max); + for (q = 0; q < LT_BACKTRACEDEPTH; q++) { + char sym[KSYM_NAME_LEN]; + char *c; + if (!task->latency_record[i].backtrace[q]) + break; + if (task->latency_record[i].backtrace[q] == ULONG_MAX) + break; + sprint_symbol(sym, task->latency_record[i].backtrace[q]); + c = strchr(sym, '+'); + if (c) + *c = 0; + seq_printf(m, "%s ", sym); + } + seq_printf(m, "\n"); + } + + } + return 0; +} + +static int lstats_open(struct inode *inode, struct file *file) +{ + int ret; + struct seq_file *m; + struct task_struct *task = get_proc_task(inode); + + ret = single_open(file, lstats_show_proc, NULL); + if (!ret) { + m = file->private_data; + m->private = task; + } + return ret; +} + +static ssize_t lstats_write(struct file *file, const char __user *buf, + size_t count, loff_t *offs) +{ + struct seq_file *m; + struct task_struct *task; + + m = file->private_data; + task = m->private; + clear_all_latency_tracing(task); + + return count; +} + +static const struct file_operations proc_lstats_operations = { + .open = lstats_open, + .read = seq_read, + .write = lstats_write, + .llseek = seq_lseek, + .release = single_release, +}; + +#endif + /* The badness from the OOM killer */ unsigned long badness(struct task_struct *p, unsigned long uptime); static int proc_oom_score(struct task_struct *task, char *buffer) @@ -1020,6 +1091,7 @@ static const struct file_operations proc_fault_inject_operations = { }; #endif + #ifdef CONFIG_SCHED_DEBUG /* * Print out various scheduling related per-task fields: @@ -2230,6 +2302,9 @@ static const struct pid_entry tgid_base_stuff[] = { #ifdef CONFIG_SCHEDSTATS INF("schedstat", S_IRUGO, pid_schedstat), #endif +#ifdef CONFIG_LATENCYTOP + REG("latency", S_IRUGO, lstats), +#endif #ifdef CONFIG_PROC_PID_CPUSET REG("cpuset", S_IRUGO, cpuset), #endif @@ -2555,6 +2630,9 @@ static const struct pid_entry tid_base_stuff[] = { #ifdef CONFIG_SCHEDSTATS INF("schedstat", S_IRUGO, pid_schedstat), #endif +#ifdef CONFIG_LATENCYTOP + REG("latency", S_IRUGO, lstats), +#endif #ifdef CONFIG_PROC_PID_CPUSET REG("cpuset", S_IRUGO, cpuset), #endif diff --git a/include/linux/latencytop.h b/include/linux/latencytop.h new file mode 100644 index 000000000000..901c2d6377a8 --- /dev/null +++ b/include/linux/latencytop.h @@ -0,0 +1,44 @@ +/* + * latencytop.h: Infrastructure for displaying latency + * + * (C) Copyright 2008 Intel Corporation + * Author: Arjan van de Ven + * + */ + +#ifndef _INCLUDE_GUARD_LATENCYTOP_H_ +#define _INCLUDE_GUARD_LATENCYTOP_H_ + +#ifdef CONFIG_LATENCYTOP + +#define LT_SAVECOUNT 32 +#define LT_BACKTRACEDEPTH 12 + +struct latency_record { + unsigned long backtrace[LT_BACKTRACEDEPTH]; + unsigned int count; + unsigned long time; + unsigned long max; +}; + + +struct task_struct; + +void account_scheduler_latency(struct task_struct *task, int usecs, int inter); + +void clear_all_latency_tracing(struct task_struct *p); + +#else + +static inline void +account_scheduler_latency(struct task_struct *task, int usecs, int inter) +{ +} + +static inline void clear_all_latency_tracing(struct task_struct *p) +{ +} + +#endif + +#endif diff --git a/include/linux/sched.h b/include/linux/sched.h index acadcab89ef9..dfc76e172f3f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -88,6 +88,7 @@ struct sched_param { #include #include #include +#include #include @@ -1220,6 +1221,10 @@ struct task_struct { int make_it_fail; #endif struct prop_local_single dirties; +#ifdef CONFIG_LATENCYTOP + int latency_record_count; + struct latency_record latency_record[LT_SAVECOUNT]; +#endif }; /* diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h index e7fa657d0c49..5da9794b2d78 100644 --- a/include/linux/stacktrace.h +++ b/include/linux/stacktrace.h @@ -9,10 +9,13 @@ struct stack_trace { }; extern void save_stack_trace(struct stack_trace *trace); +extern void save_stack_trace_tsk(struct task_struct *tsk, + struct stack_trace *trace); extern void print_stack_trace(struct stack_trace *trace, int spaces); #else # define save_stack_trace(trace) do { } while (0) +# define save_stack_trace_tsk(tsk, trace) do { } while (0) # define print_stack_trace(trace, spaces) do { } while (0) #endif diff --git a/kernel/Makefile b/kernel/Makefile index 68755cd9a7e4..390d42146267 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -62,6 +62,7 @@ obj-$(CONFIG_SYSCTL) += utsname_sysctl.o obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o obj-$(CONFIG_MARKERS) += marker.o +obj-$(CONFIG_LATENCYTOP) += latencytop.o ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y) # According to Alan Modra , the -fno-omit-frame-pointer is diff --git a/kernel/fork.c b/kernel/fork.c index 0c969f4fade0..39d22b3357de 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1205,6 +1205,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, #ifdef TIF_SYSCALL_EMU clear_tsk_thread_flag(p, TIF_SYSCALL_EMU); #endif + clear_all_latency_tracing(p); /* Our parent execution domain becomes current domain These must match for thread signalling to apply */ diff --git a/kernel/latencytop.c b/kernel/latencytop.c new file mode 100644 index 000000000000..b4e3c85abe74 --- /dev/null +++ b/kernel/latencytop.c @@ -0,0 +1,239 @@ +/* + * latencytop.c: Latency display infrastructure + * + * (C) Copyright 2008 Intel Corporation + * Author: Arjan van de Ven + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static DEFINE_SPINLOCK(latency_lock); + +#define MAXLR 128 +static struct latency_record latency_record[MAXLR]; + +int latencytop_enabled; + +void clear_all_latency_tracing(struct task_struct *p) +{ + unsigned long flags; + + if (!latencytop_enabled) + return; + + spin_lock_irqsave(&latency_lock, flags); + memset(&p->latency_record, 0, sizeof(p->latency_record)); + p->latency_record_count = 0; + spin_unlock_irqrestore(&latency_lock, flags); +} + +static void clear_global_latency_tracing(void) +{ + unsigned long flags; + + spin_lock_irqsave(&latency_lock, flags); + memset(&latency_record, 0, sizeof(latency_record)); + spin_unlock_irqrestore(&latency_lock, flags); +} + +static void __sched +account_global_scheduler_latency(struct task_struct *tsk, struct latency_record *lat) +{ + int firstnonnull = MAXLR + 1; + int i; + + if (!latencytop_enabled) + return; + + /* skip kernel threads for now */ + if (!tsk->mm) + return; + + for (i = 0; i < MAXLR; i++) { + int q; + int same = 1; + /* Nothing stored: */ + if (!latency_record[i].backtrace[0]) { + if (firstnonnull > i) + firstnonnull = i; + continue; + } + for (q = 0 ; q < LT_BACKTRACEDEPTH ; q++) { + if (latency_record[i].backtrace[q] != + lat->backtrace[q]) + same = 0; + if (same && lat->backtrace[q] == 0) + break; + if (same && lat->backtrace[q] == ULONG_MAX) + break; + } + if (same) { + latency_record[i].count++; + latency_record[i].time += lat->time; + if (lat->time > latency_record[i].max) + latency_record[i].max = lat->time; + return; + } + } + + i = firstnonnull; + if (i >= MAXLR - 1) + return; + + /* Allocted a new one: */ + memcpy(&latency_record[i], lat, sizeof(struct latency_record)); +} + +static inline void store_stacktrace(struct task_struct *tsk, struct latency_record *lat) +{ + struct stack_trace trace; + + memset(&trace, 0, sizeof(trace)); + trace.max_entries = LT_BACKTRACEDEPTH; + trace.entries = &lat->backtrace[0]; + trace.skip = 0; + save_stack_trace_tsk(tsk, &trace); +} + +void __sched +account_scheduler_latency(struct task_struct *tsk, int usecs, int inter) +{ + unsigned long flags; + int i, q; + struct latency_record lat; + + if (!latencytop_enabled) + return; + + /* Long interruptible waits are generally user requested... */ + if (inter && usecs > 5000) + return; + + memset(&lat, 0, sizeof(lat)); + lat.count = 1; + lat.time = usecs; + lat.max = usecs; + store_stacktrace(tsk, &lat); + + spin_lock_irqsave(&latency_lock, flags); + + account_global_scheduler_latency(tsk, &lat); + + /* + * short term hack; if we're > 32 we stop; future we recycle: + */ + tsk->latency_record_count++; + if (tsk->latency_record_count >= LT_SAVECOUNT) + goto out_unlock; + + for (i = 0; i < LT_SAVECOUNT ; i++) { + struct latency_record *mylat; + int same = 1; + mylat = &tsk->latency_record[i]; + for (q = 0 ; q < LT_BACKTRACEDEPTH ; q++) { + if (mylat->backtrace[q] != + lat.backtrace[q]) + same = 0; + if (same && lat.backtrace[q] == 0) + break; + if (same && lat.backtrace[q] == ULONG_MAX) + break; + } + if (same) { + mylat->count++; + mylat->time += lat.time; + if (lat.time > mylat->max) + mylat->max = lat.time; + goto out_unlock; + } + } + + /* Allocated a new one: */ + i = tsk->latency_record_count; + memcpy(&tsk->latency_record[i], &lat, sizeof(struct latency_record)); + +out_unlock: + spin_unlock_irqrestore(&latency_lock, flags); +} + +static int lstats_show(struct seq_file *m, void *v) +{ + int i; + + seq_puts(m, "Latency Top version : v0.1\n"); + + for (i = 0; i < MAXLR; i++) { + if (latency_record[i].backtrace[0]) { + int q; + seq_printf(m, "%i %li %li ", + latency_record[i].count, + latency_record[i].time, + latency_record[i].max); + for (q = 0; q < LT_BACKTRACEDEPTH; q++) { + char sym[KSYM_NAME_LEN]; + char *c; + if (!latency_record[i].backtrace[q]) + break; + if (latency_record[i].backtrace[q] == ULONG_MAX) + break; + sprint_symbol(sym, latency_record[i].backtrace[q]); + c = strchr(sym, '+'); + if (c) + *c = 0; + seq_printf(m, "%s ", sym); + } + seq_printf(m, "\n"); + } + } + return 0; +} + +static ssize_t +lstats_write(struct file *file, const char __user *buf, size_t count, + loff_t *offs) +{ + clear_global_latency_tracing(); + + return count; +} + +static int lstats_open(struct inode *inode, struct file *filp) +{ + return single_open(filp, lstats_show, NULL); +} + +static struct file_operations lstats_fops = { + .open = lstats_open, + .read = seq_read, + .write = lstats_write, + .llseek = seq_lseek, + .release = single_release, +}; + +static int __init init_lstats_procfs(void) +{ + struct proc_dir_entry *pe; + + pe = create_proc_entry("latency_stats", 0644, NULL); + if (!pe) + return -ENOMEM; + + pe->proc_fops = &lstats_fops; + + return 0; +} +__initcall(init_lstats_procfs); diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 3dab1ff83c4f..1b3b40ad7c54 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -20,6 +20,8 @@ * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra */ +#include + /* * Targeted preemption latency for CPU-bound tasks: * (default: 20ms * (1 + ilog(ncpus)), units: nanoseconds) @@ -434,6 +436,7 @@ static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se) #ifdef CONFIG_SCHEDSTATS if (se->sleep_start) { u64 delta = rq_of(cfs_rq)->clock - se->sleep_start; + struct task_struct *tsk = task_of(se); if ((s64)delta < 0) delta = 0; @@ -443,9 +446,12 @@ static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se) se->sleep_start = 0; se->sum_sleep_runtime += delta; + + account_scheduler_latency(tsk, delta >> 10, 1); } if (se->block_start) { u64 delta = rq_of(cfs_rq)->clock - se->block_start; + struct task_struct *tsk = task_of(se); if ((s64)delta < 0) delta = 0; @@ -462,11 +468,11 @@ static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se) * time that the task spent sleeping: */ if (unlikely(prof_on == SLEEP_PROFILING)) { - struct task_struct *tsk = task_of(se); profile_hits(SLEEP_PROFILING, (void *)get_wchan(tsk), delta >> 20); } + account_scheduler_latency(tsk, delta >> 10, 0); } #endif } diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 3afbd25f43eb..5418ef61e16e 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -81,6 +81,7 @@ extern int compat_log; extern int maps_protect; extern int sysctl_stat_interval; extern int audit_argv_kb; +extern int latencytop_enabled; /* Constants used for minimum and maximum */ #ifdef CONFIG_DETECT_SOFTLOCKUP @@ -416,6 +417,15 @@ static struct ctl_table kern_table[] = { .proc_handler = &proc_dointvec_taint, }, #endif +#ifdef CONFIG_LATENCYTOP + { + .procname = "latencytop", + .data = &latencytop_enabled, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, +#endif #ifdef CONFIG_SECURITY_CAPABILITIES { .procname = "cap-bound", diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index a60109307d32..14fb355e3caa 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -517,4 +517,18 @@ config FAULT_INJECTION_STACKTRACE_FILTER help Provide stacktrace filter for fault-injection capabilities +config LATENCYTOP + bool "Latency measuring infrastructure" + select FRAME_POINTER if !MIPS + select KALLSYMS + select KALLSYMS_ALL + select STACKTRACE + select SCHEDSTATS + select SCHED_DEBUG + depends on X86 || X86_64 + help + Enable this option if you want to use the LatencyTOP tool + to find out which userspace is blocking on what kernel operations. + + source "samples/Kconfig" -- cgit v1.2.3 From 01ba2bdc6b639764745ff678caf3fb9e5bcd745a Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Sun, 20 Jan 2008 14:15:03 +0100 Subject: all archs: consolidate init and exit sections in vmlinux.lds.h This patch consolidate all definitions of .init.text, .init.data and .exit.text, .exit.data section definitions in the generic vmlinux.lds.h. This is a preparational patch - alone it does not buy us much good. Signed-off-by: Sam Ravnborg --- arch/alpha/kernel/vmlinux.lds.S | 8 ++++---- arch/arm/kernel/vmlinux.lds.S | 10 +++++----- arch/avr32/kernel/vmlinux.lds.S | 8 ++++---- arch/blackfin/kernel/vmlinux.lds.S | 8 ++++---- arch/cris/arch-v10/vmlinux.lds.S | 8 ++++---- arch/cris/arch-v32/vmlinux.lds.S | 8 ++++---- arch/frv/kernel/vmlinux.lds.S | 14 +++++++------- arch/h8300/kernel/vmlinux.lds.S | 8 ++++---- arch/ia64/kernel/vmlinux.lds.S | 8 ++++---- arch/m32r/kernel/vmlinux.lds.S | 12 ++++++------ arch/m68k/kernel/vmlinux-std.lds | 8 ++++---- arch/m68k/kernel/vmlinux-sun3.lds | 8 ++++---- arch/m68knommu/kernel/vmlinux.lds.S | 8 ++++---- arch/mips/kernel/vmlinux.lds.S | 8 ++++---- arch/parisc/kernel/vmlinux.lds.S | 8 ++++---- arch/powerpc/kernel/vmlinux.lds.S | 10 ++++++---- arch/ppc/kernel/vmlinux.lds.S | 8 ++++---- arch/s390/kernel/vmlinux.lds.S | 8 ++++---- arch/sh/kernel/vmlinux_32.lds.S | 8 ++++---- arch/sh/kernel/vmlinux_64.lds.S | 8 ++++---- arch/sparc/kernel/vmlinux.lds.S | 8 ++++---- arch/sparc64/kernel/vmlinux.lds.S | 8 ++++---- arch/um/kernel/dyn.lds.S | 4 ++-- arch/um/kernel/uml.lds.S | 4 ++-- arch/v850/kernel/vmlinux.lds.S | 10 +++++----- arch/x86/kernel/vmlinux_32.lds.S | 14 ++++++++++---- arch/x86/kernel/vmlinux_64.lds.S | 19 +++++++++++++------ arch/xtensa/kernel/vmlinux.lds.S | 9 +++++---- include/asm-generic/vmlinux.lds.h | 7 +++++++ 29 files changed, 140 insertions(+), 117 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/alpha/kernel/vmlinux.lds.S b/arch/alpha/kernel/vmlinux.lds.S index 55c05b511f4c..f13249be17c5 100644 --- a/arch/alpha/kernel/vmlinux.lds.S +++ b/arch/alpha/kernel/vmlinux.lds.S @@ -46,11 +46,11 @@ SECTIONS __init_begin = .; .init.text : { _sinittext = .; - *(.init.text) + INIT_TEXT _einittext = .; } .init.data : { - *(.init.data) + INIT_DATA } . = ALIGN(16); @@ -136,8 +136,8 @@ SECTIONS /* Sections to be discarded */ /DISCARD/ : { - *(.exit.text) - *(.exit.data) + EXIT_TEXT + EXIT_DATA *(.exitcall.exit) } diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index 30f732c7fdb5..4898bdcfe7dd 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S @@ -30,7 +30,7 @@ SECTIONS } .init : { /* Init code and data */ - *(.init.text) + INIT_TEXT _einittext = .; __proc_info_begin = .; *(.proc.info.init) @@ -70,15 +70,15 @@ SECTIONS __per_cpu_end = .; #ifndef CONFIG_XIP_KERNEL __init_begin = _stext; - *(.init.data) + INIT_DATA . = ALIGN(4096); __init_end = .; #endif } /DISCARD/ : { /* Exit code and data */ - *(.exit.text) - *(.exit.data) + EXIT_TEXT + EXIT_DATA *(.exitcall.exit) #ifndef CONFIG_MMU *(.fixup) @@ -130,7 +130,7 @@ SECTIONS #ifdef CONFIG_XIP_KERNEL . = ALIGN(4096); __init_begin = .; - *(.init.data) + INIT_DATA . = ALIGN(4096); __init_end = .; #endif diff --git a/arch/avr32/kernel/vmlinux.lds.S b/arch/avr32/kernel/vmlinux.lds.S index 11f08e35a2eb..481cfd40c053 100644 --- a/arch/avr32/kernel/vmlinux.lds.S +++ b/arch/avr32/kernel/vmlinux.lds.S @@ -27,19 +27,19 @@ SECTIONS __init_begin = .; _sinittext = .; *(.text.reset) - *(.init.text) + INIT_TEXT /* * .exit.text is discarded at runtime, not * link time, to deal with references from * __bug_table */ - *(.exit.text) + EXIT_TEXT _einittext = .; . = ALIGN(4); __tagtable_begin = .; *(.taglist.init) __tagtable_end = .; - *(.init.data) + INIT_DATA . = ALIGN(16); __setup_start = .; *(.init.setup) @@ -135,7 +135,7 @@ SECTIONS * thrown away, as cleanup code is never called unless it's a module. */ /DISCARD/ : { - *(.exit.data) + EXIT_DATA *(.exitcall.exit) } diff --git a/arch/blackfin/kernel/vmlinux.lds.S b/arch/blackfin/kernel/vmlinux.lds.S index 9b75bc83c71f..858722421b40 100644 --- a/arch/blackfin/kernel/vmlinux.lds.S +++ b/arch/blackfin/kernel/vmlinux.lds.S @@ -91,13 +91,13 @@ SECTIONS { . = ALIGN(PAGE_SIZE); __sinittext = .; - *(.init.text) + INIT_TEXT __einittext = .; } .init.data : { . = ALIGN(16); - *(.init.data) + INIT_DATA } .init.setup : { @@ -198,8 +198,8 @@ SECTIONS /DISCARD/ : { - *(.exit.text) - *(.exit.data) + EXIT_TEXT + EXIT_DATA *(.exitcall.exit) } } diff --git a/arch/cris/arch-v10/vmlinux.lds.S b/arch/cris/arch-v10/vmlinux.lds.S index 97a7876ed681..93c9f0ea286b 100644 --- a/arch/cris/arch-v10/vmlinux.lds.S +++ b/arch/cris/arch-v10/vmlinux.lds.S @@ -57,10 +57,10 @@ SECTIONS __init_begin = .; .init.text : { _sinittext = .; - *(.init.text) + INIT_TEXT _einittext = .; } - .init.data : { *(.init.data) } + .init.data : { INIT_DATA } . = ALIGN(16); __setup_start = .; .init.setup : { *(.init.setup) } @@ -109,8 +109,8 @@ SECTIONS /* Sections to be discarded */ /DISCARD/ : { - *(.text.exit) - *(.data.exit) + EXIT_TEXT + EXIT_DATA *(.exitcall.exit) } diff --git a/arch/cris/arch-v32/vmlinux.lds.S b/arch/cris/arch-v32/vmlinux.lds.S index b076c134c0bb..fead8c59ea63 100644 --- a/arch/cris/arch-v32/vmlinux.lds.S +++ b/arch/cris/arch-v32/vmlinux.lds.S @@ -61,10 +61,10 @@ SECTIONS __init_begin = .; .init.text : { _sinittext = .; - *(.init.text) + INIT_TEXT _einittext = .; } - .init.data : { *(.init.data) } + .init.data : { INIT_DATA } . = ALIGN(16); __setup_start = .; .init.setup : { *(.init.setup) } @@ -124,8 +124,8 @@ SECTIONS /* Sections to be discarded */ /DISCARD/ : { - *(.text.exit) - *(.data.exit) + EXIT_TEXT + EXIT_DATA *(.exitcall.exit) } diff --git a/arch/frv/kernel/vmlinux.lds.S b/arch/frv/kernel/vmlinux.lds.S index a17a81d58bf6..f42b328b1dd0 100644 --- a/arch/frv/kernel/vmlinux.lds.S +++ b/arch/frv/kernel/vmlinux.lds.S @@ -28,14 +28,14 @@ SECTIONS .init.text : { *(.text.head) #ifndef CONFIG_DEBUG_INFO - *(.init.text) - *(.exit.text) - *(.exit.data) + INIT_TEXT + EXIT_TEXT + EXIT_DATA *(.exitcall.exit) #endif } _einittext = .; - .init.data : { *(.init.data) } + .init.data : { INIT_DATA } . = ALIGN(8); __setup_start = .; @@ -106,8 +106,8 @@ SECTIONS LOCK_TEXT #ifdef CONFIG_DEBUG_INFO *( - .init.text - .exit.text + INIT_TEXT + EXIT_TEXT .exitcall.exit ) #endif @@ -138,7 +138,7 @@ SECTIONS .data : { /* Data */ DATA_DATA *(.data.*) - *(.exit.data) + EXIT_DATA CONSTRUCTORS } diff --git a/arch/h8300/kernel/vmlinux.lds.S b/arch/h8300/kernel/vmlinux.lds.S index a2e72d495551..43a87b9085b6 100644 --- a/arch/h8300/kernel/vmlinux.lds.S +++ b/arch/h8300/kernel/vmlinux.lds.S @@ -110,9 +110,9 @@ SECTIONS . = ALIGN(0x4) ; ___init_begin = .; __sinittext = .; - *(.init.text) + INIT_TEXT __einittext = .; - *(.init.data) + INIT_DATA . = ALIGN(0x4) ; ___setup_start = .; *(.init.setup) @@ -124,8 +124,8 @@ SECTIONS ___con_initcall_start = .; *(.con_initcall.init) ___con_initcall_end = .; - *(.exit.text) - *(.exit.data) + EXIT_TEXT + EXIT_DATA #if defined(CONFIG_BLK_DEV_INITRD) . = ALIGN(4); ___initramfs_start = .; diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S index 757e419ebcf8..80622acc95de 100644 --- a/arch/ia64/kernel/vmlinux.lds.S +++ b/arch/ia64/kernel/vmlinux.lds.S @@ -27,8 +27,8 @@ SECTIONS { /* Sections to be discarded */ /DISCARD/ : { - *(.exit.text) - *(.exit.data) + EXIT_TEXT + EXIT_DATA *(.exitcall.exit) *(.IA_64.unwind.exit.text) *(.IA_64.unwind_info.exit.text) @@ -119,12 +119,12 @@ SECTIONS .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) { _sinittext = .; - *(.init.text) + INIT_TEXT _einittext = .; } .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) - { *(.init.data) } + { INIT_DATA } #ifdef CONFIG_BLK_DEV_INITRD .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) diff --git a/arch/m32r/kernel/vmlinux.lds.S b/arch/m32r/kernel/vmlinux.lds.S index 942a8c7a4417..41b07854fcc6 100644 --- a/arch/m32r/kernel/vmlinux.lds.S +++ b/arch/m32r/kernel/vmlinux.lds.S @@ -76,10 +76,10 @@ SECTIONS __init_begin = .; .init.text : { _sinittext = .; - *(.init.text) + INIT_TEXT _einittext = .; } - .init.data : { *(.init.data) } + .init.data : { INIT_DATA } . = ALIGN(16); __setup_start = .; .init.setup : { *(.init.setup) } @@ -100,8 +100,8 @@ SECTIONS .altinstr_replacement : { *(.altinstr_replacement) } /* .exit.text is discard at runtime, not link time, to deal with references from .altinstructions and .eh_frame */ - .exit.text : { *(.exit.text) } - .exit.data : { *(.exit.data) } + .exit.text : { EXIT_TEXT } + .exit.data : { EXIT_DATA } #ifdef CONFIG_BLK_DEV_INITRD . = ALIGN(4096); @@ -124,8 +124,8 @@ SECTIONS /* Sections to be discarded */ /DISCARD/ : { - *(.exit.text) - *(.exit.data) + EXIT_TEXT + EXIT_DATA *(.exitcall.exit) } diff --git a/arch/m68k/kernel/vmlinux-std.lds b/arch/m68k/kernel/vmlinux-std.lds index 59fe285865ec..7537cc5e6159 100644 --- a/arch/m68k/kernel/vmlinux-std.lds +++ b/arch/m68k/kernel/vmlinux-std.lds @@ -45,10 +45,10 @@ SECTIONS __init_begin = .; .init.text : { _sinittext = .; - *(.init.text) + INIT_TEXT _einittext = .; } - .init.data : { *(.init.data) } + .init.data : { INIT_DATA } . = ALIGN(16); __setup_start = .; .init.setup : { *(.init.setup) } @@ -82,8 +82,8 @@ SECTIONS /* Sections to be discarded */ /DISCARD/ : { - *(.exit.text) - *(.exit.data) + EXIT_TEXT + EXIT_DATA *(.exitcall.exit) } diff --git a/arch/m68k/kernel/vmlinux-sun3.lds b/arch/m68k/kernel/vmlinux-sun3.lds index 4adffefb5c48..cdc313e7c299 100644 --- a/arch/m68k/kernel/vmlinux-sun3.lds +++ b/arch/m68k/kernel/vmlinux-sun3.lds @@ -38,10 +38,10 @@ SECTIONS __init_begin = .; .init.text : { _sinittext = .; - *(.init.text) + INIT_TEXT _einittext = .; } - .init.data : { *(.init.data) } + .init.data : { INIT_DATA } . = ALIGN(16); __setup_start = .; .init.setup : { *(.init.setup) } @@ -77,8 +77,8 @@ __init_begin = .; /* Sections to be discarded */ /DISCARD/ : { - *(.exit.text) - *(.exit.data) + EXIT_TEXT + EXIT_DATA *(.exitcall.exit) } diff --git a/arch/m68knommu/kernel/vmlinux.lds.S b/arch/m68knommu/kernel/vmlinux.lds.S index 07a0055602f4..b44edb08e212 100644 --- a/arch/m68knommu/kernel/vmlinux.lds.S +++ b/arch/m68knommu/kernel/vmlinux.lds.S @@ -143,9 +143,9 @@ SECTIONS { . = ALIGN(4096); __init_begin = .; _sinittext = .; - *(.init.text) + INIT_TEXT _einittext = .; - *(.init.data) + INIT_DATA . = ALIGN(16); __setup_start = .; *(.init.setup) @@ -170,8 +170,8 @@ SECTIONS { } > INIT /DISCARD/ : { - *(.exit.text) - *(.exit.data) + EXIT_TEXT + EXIT_DATA *(.exitcall.exit) } diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S index 5fc2398bdb76..b5470ceb418b 100644 --- a/arch/mips/kernel/vmlinux.lds.S +++ b/arch/mips/kernel/vmlinux.lds.S @@ -114,11 +114,11 @@ SECTIONS __init_begin = .; .init.text : { _sinittext = .; - *(.init.text) + INIT_TEXT _einittext = .; } .init.data : { - *(.init.data) + INIT_DATA } . = ALIGN(16); .init.setup : { @@ -144,10 +144,10 @@ SECTIONS * references from .rodata */ .exit.text : { - *(.exit.text) + EXIT_TEXT } .exit.data : { - *(.exit.data) + EXIT_DATA } #if defined(CONFIG_BLK_DEV_INITRD) . = ALIGN(_PAGE_SIZE); diff --git a/arch/parisc/kernel/vmlinux.lds.S b/arch/parisc/kernel/vmlinux.lds.S index 40d0ff9b81ab..50b4a3a25d0a 100644 --- a/arch/parisc/kernel/vmlinux.lds.S +++ b/arch/parisc/kernel/vmlinux.lds.S @@ -172,11 +172,11 @@ SECTIONS __init_begin = .; .init.text : { _sinittext = .; - *(.init.text) + INIT_TEXT _einittext = .; } .init.data : { - *(.init.data) + INIT_DATA } . = ALIGN(16); .init.setup : { @@ -215,10 +215,10 @@ SECTIONS * from .altinstructions and .eh_frame */ .exit.text : { - *(.exit.text) + EXIT_TEXT } .exit.data : { - *(.exit.data) + EXIT_DATA } #ifdef CONFIG_BLK_DEV_INITRD . = ALIGN(PAGE_SIZE); diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index f66fa5d966b0..0afb9e31d2a0 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -23,7 +23,7 @@ SECTIONS /* Sections to be discarded. */ /DISCARD/ : { *(.exitcall.exit) - *(.exit.data) + EXIT_DATA } . = KERNELBASE; @@ -76,17 +76,19 @@ SECTIONS .init.text : { _sinittext = .; - *(.init.text) + INIT_TEXT _einittext = .; } /* .exit.text is discarded at runtime, not link time, * to deal with references from __bug_table */ - .exit.text : { *(.exit.text) } + .exit.text : { + EXIT_TEXT + } .init.data : { - *(.init.data); + INIT_DATA __vtop_table_begin = .; *(.vtop_fixup); __vtop_table_end = .; diff --git a/arch/ppc/kernel/vmlinux.lds.S b/arch/ppc/kernel/vmlinux.lds.S index 98c1212674f6..52b64fcbdfc5 100644 --- a/arch/ppc/kernel/vmlinux.lds.S +++ b/arch/ppc/kernel/vmlinux.lds.S @@ -97,14 +97,14 @@ SECTIONS __init_begin = .; .init.text : { _sinittext = .; - *(.init.text) + INIT_TEXT _einittext = .; } /* .exit.text is discarded at runtime, not link time, to deal with references from __bug_table */ - .exit.text : { *(.exit.text) } + .exit.text : { EXIT_TEXT } .init.data : { - *(.init.data); + INIT_DATA __vtop_table_begin = .; *(.vtop_fixup); __vtop_table_end = .; @@ -164,6 +164,6 @@ SECTIONS /* Sections to be discarded. */ /DISCARD/ : { *(.exitcall.exit) - *(.exit.data) + EXIT_DATA } } diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index 936159199346..7d43c3cd3ef3 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -97,7 +97,7 @@ SECTIONS __init_begin = .; .init.text : { _sinittext = .; - *(.init.text) + INIT_TEXT _einittext = .; } /* @@ -105,11 +105,11 @@ SECTIONS * to deal with references from __bug_table */ .exit.text : { - *(.exit.text) + EXIT_TEXT } .init.data : { - *(.init.data) + INIT_DATA } . = ALIGN(0x100); .init.setup : { @@ -156,7 +156,7 @@ SECTIONS /* Sections to be discarded */ /DISCARD/ : { - *(.exit.data) + EXIT_DATA *(.exitcall.exit) } diff --git a/arch/sh/kernel/vmlinux_32.lds.S b/arch/sh/kernel/vmlinux_32.lds.S index d549fac6d3e7..c7113786ecd4 100644 --- a/arch/sh/kernel/vmlinux_32.lds.S +++ b/arch/sh/kernel/vmlinux_32.lds.S @@ -84,9 +84,9 @@ SECTIONS . = ALIGN(PAGE_SIZE); /* Init code and data */ __init_begin = .; _sinittext = .; - .init.text : { *(.init.text) } + .init.text : { INIT_TEXT } _einittext = .; - .init.data : { *(.init.data) } + .init.data : { INIT_DATA } . = ALIGN(16); __setup_start = .; @@ -122,8 +122,8 @@ SECTIONS * .exit.text is discarded at runtime, not link time, to deal with * references from __bug_table */ - .exit.text : { *(.exit.text) } - .exit.data : { *(.exit.data) } + .exit.text : { EXIT_TEXT } + .exit.data : { EXIT_DATA } . = ALIGN(PAGE_SIZE); .bss : { diff --git a/arch/sh/kernel/vmlinux_64.lds.S b/arch/sh/kernel/vmlinux_64.lds.S index 2fd0f7401484..3f1bd6392bb3 100644 --- a/arch/sh/kernel/vmlinux_64.lds.S +++ b/arch/sh/kernel/vmlinux_64.lds.S @@ -96,9 +96,9 @@ SECTIONS . = ALIGN(PAGE_SIZE); /* Init code and data */ __init_begin = .; _sinittext = .; - .init.text : C_PHYS(.init.text) { *(.init.text) } + .init.text : C_PHYS(.init.text) { INIT_TEXT } _einittext = .; - .init.data : C_PHYS(.init.data) { *(.init.data) } + .init.data : C_PHYS(.init.data) { INIT_DATA } . = ALIGN(L1_CACHE_BYTES); /* Better if Cache Line aligned */ __setup_start = .; .init.setup : C_PHYS(.init.setup) { *(.init.setup) } @@ -134,8 +134,8 @@ SECTIONS * .exit.text is discarded at runtime, not link time, to deal with * references from __bug_table */ - .exit.text : C_PHYS(.exit.text) { *(.exit.text) } - .exit.data : C_PHYS(.exit.data) { *(.exit.data) } + .exit.text : C_PHYS(.exit.text) { EXIT_TEXT } + .exit.data : C_PHYS(.exit.data) { EXIT_DATA } . = ALIGN(PAGE_SIZE); .bss : C_PHYS(.bss) { diff --git a/arch/sparc/kernel/vmlinux.lds.S b/arch/sparc/kernel/vmlinux.lds.S index a8b4200f9cc3..216147d6e61f 100644 --- a/arch/sparc/kernel/vmlinux.lds.S +++ b/arch/sparc/kernel/vmlinux.lds.S @@ -48,12 +48,12 @@ SECTIONS __init_begin = .; .init.text : { _sinittext = .; - *(.init.text) + INIT_TEXT _einittext = .; } __init_text_end = .; .init.data : { - *(.init.data) + INIT_DATA } . = ALIGN(16); .init.setup : { @@ -102,8 +102,8 @@ SECTIONS _end = . ; PROVIDE (end = .); /DISCARD/ : { - *(.exit.text) - *(.exit.data) + EXIT_TEXT + EXIT_DATA *(.exitcall.exit) } diff --git a/arch/sparc64/kernel/vmlinux.lds.S b/arch/sparc64/kernel/vmlinux.lds.S index 9fcd503bc04a..01f809617e5e 100644 --- a/arch/sparc64/kernel/vmlinux.lds.S +++ b/arch/sparc64/kernel/vmlinux.lds.S @@ -56,11 +56,11 @@ SECTIONS .init.text : { __init_begin = .; _sinittext = .; - *(.init.text) + INIT_TEXT _einittext = .; } .init.data : { - *(.init.data) + INIT_DATA } . = ALIGN(16); .init.setup : { @@ -137,8 +137,8 @@ SECTIONS PROVIDE (end = .); /DISCARD/ : { - *(.exit.text) - *(.exit.data) + EXIT_TEXT + EXIT_DATA *(.exitcall.exit) } diff --git a/arch/um/kernel/dyn.lds.S b/arch/um/kernel/dyn.lds.S index 3866f4960f04..26090b7f323e 100644 --- a/arch/um/kernel/dyn.lds.S +++ b/arch/um/kernel/dyn.lds.S @@ -17,7 +17,7 @@ SECTIONS __init_begin = .; .init.text : { _sinittext = .; - *(.init.text) + INIT_TEXT _einittext = .; } @@ -84,7 +84,7 @@ SECTIONS #include "asm/common.lds.S" - init.data : { *(.init.data) } + init.data : { INIT_DATA } /* Ensure the __preinit_array_start label is properly aligned. We could instead move the label definition inside the section, but diff --git a/arch/um/kernel/uml.lds.S b/arch/um/kernel/uml.lds.S index 13df191e2b41..5828c1d54505 100644 --- a/arch/um/kernel/uml.lds.S +++ b/arch/um/kernel/uml.lds.S @@ -23,7 +23,7 @@ SECTIONS __init_begin = .; .init.text : { _sinittext = .; - *(.init.text) + INIT_TEXT _einittext = .; } . = ALIGN(4096); @@ -48,7 +48,7 @@ SECTIONS #include "asm/common.lds.S" - init.data : { *(init.data) } + init.data : { INIT_DATA } .data : { . = ALIGN(KERNEL_STACK_SIZE); /* init_task */ diff --git a/arch/v850/kernel/vmlinux.lds.S b/arch/v850/kernel/vmlinux.lds.S index 6172599b4ce2..d08cd1d27f27 100644 --- a/arch/v850/kernel/vmlinux.lds.S +++ b/arch/v850/kernel/vmlinux.lds.S @@ -114,7 +114,7 @@ #define DATA_CONTENTS \ __sdata = . ; \ DATA_DATA \ - *(.exit.data) /* 2.5 convention */ \ + EXIT_DATA /* 2.5 convention */ \ *(.data.exit) /* 2.4 convention */ \ . = ALIGN (16) ; \ *(.data.cacheline_aligned) \ @@ -157,9 +157,9 @@ . = ALIGN (4096) ; \ __init_start = . ; \ __sinittext = .; \ - *(.init.text) /* 2.5 convention */ \ + INIT_TEXT /* 2.5 convention */ \ __einittext = .; \ - *(.init.data) \ + INIT_DATA \ *(.text.init) /* 2.4 convention */ \ *(.data.init) \ INITCALL_CONTENTS \ @@ -170,7 +170,7 @@ #define ROMK_INIT_RAM_CONTENTS \ . = ALIGN (4096) ; \ __init_start = . ; \ - *(.init.data) /* 2.5 convention */ \ + INIT_DATA /* 2.5 convention */ \ *(.data.init) /* 2.4 convention */ \ __init_end = . ; \ . = ALIGN (4096) ; @@ -179,7 +179,7 @@ should go into ROM. */ #define ROMK_INIT_ROM_CONTENTS \ _sinittext = .; \ - *(.init.text) /* 2.5 convention */ \ + INIT_TEXT /* 2.5 convention */ \ _einittext = .; \ *(.text.init) /* 2.4 convention */ \ INITCALL_CONTENTS \ diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S index 7d72cce00529..84c913f38f98 100644 --- a/arch/x86/kernel/vmlinux_32.lds.S +++ b/arch/x86/kernel/vmlinux_32.lds.S @@ -131,10 +131,12 @@ SECTIONS .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) { __init_begin = .; _sinittext = .; - *(.init.text) + INIT_TEXT _einittext = .; } - .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) { *(.init.data) } + .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) { + INIT_DATA + } . = ALIGN(16); .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) { __setup_start = .; @@ -169,8 +171,12 @@ SECTIONS } /* .exit.text is discard at runtime, not link time, to deal with references from .altinstructions and .eh_frame */ - .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) { *(.exit.text) } - .exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) { *(.exit.data) } + .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) { + EXIT_TEXT + } + .exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) { + EXIT_DATA + } #if defined(CONFIG_BLK_DEV_INITRD) . = ALIGN(4096); .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) { diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S index ba8ea97abd21..ea5386944e67 100644 --- a/arch/x86/kernel/vmlinux_64.lds.S +++ b/arch/x86/kernel/vmlinux_64.lds.S @@ -155,12 +155,15 @@ SECTIONS __init_begin = .; .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) { _sinittext = .; - *(.init.text) + INIT_TEXT _einittext = .; } - __initdata_begin = .; - .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) { *(.init.data) } - __initdata_end = .; + .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) { + __initdata_begin = .; + INIT_DATA + __initdata_end = .; + } + . = ALIGN(16); __setup_start = .; .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) { *(.init.setup) } @@ -187,8 +190,12 @@ SECTIONS } /* .exit.text is discard at runtime, not link time, to deal with references from .altinstructions and .eh_frame */ - .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) { *(.exit.text) } - .exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) { *(.exit.data) } + .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) { + EXIT_TEXT + } + .exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) { + EXIT_DATA + } /* vdso blob that is mapped into user space */ vdso_start = . ; diff --git a/arch/xtensa/kernel/vmlinux.lds.S b/arch/xtensa/kernel/vmlinux.lds.S index ac4ed52034db..7d0f55a4982d 100644 --- a/arch/xtensa/kernel/vmlinux.lds.S +++ b/arch/xtensa/kernel/vmlinux.lds.S @@ -136,13 +136,13 @@ SECTIONS __init_begin = .; .init.text : { _sinittext = .; - *(.init.literal) *(.init.text) + *(.init.literal) INIT_TEXT _einittext = .; } .init.data : { - *(.init.data) + INIT_DATA . = ALIGN(0x4); __tagtable_begin = .; *(.taglist) @@ -278,8 +278,9 @@ SECTIONS /* Sections to be discarded */ /DISCARD/ : { - *(.exit.literal .exit.text) - *(.exit.data) + *(.exit.literal) + EXIT_TEXT + EXIT_DATA *(.exitcall.exit) } diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 9f584cc5c5fb..ae0166e83490 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -183,6 +183,13 @@ *(.kprobes.text) \ VMLINUX_SYMBOL(__kprobes_text_end) = .; +/* init and exit section handling */ +#define INIT_TEXT *(.init.text) +#define INIT_DATA *(.init.data) +#define EXIT_TEXT *(.exit.text) +#define EXIT_DATA *(.exit.data) + + /* DWARF debug sections. Symbols in the DWARF debugging sections are relative to the beginning of the section so we begin them at 0. */ -- cgit v1.2.3 From 213eca7f4888e9817e8076cdab6b9f7295c181f6 Mon Sep 17 00:00:00 2001 From: Greg KH Date: Wed, 30 Jan 2008 13:29:58 +0100 Subject: kobj: fix threshold_init_device/kobject_uevent_env oops the logic in this function is just crazy. It's recursive, but we can circumvent the creation for the kobject and whole creation of the threshold_block if some conditions are met. That's why we see the allocate_threshold_blocks so many times in the callstack, yet only a few kobjects created. Then we blow up in kobject_uevent_env() on the first debug printk. Which means that we are just passing in garbage. Man, this is one time that comments in code would have been very nice to have, and why forward goto's into major code blocks are just evil... Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/mcheck/mce_amd_64.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c index 753588755fee..073afa7dd89a 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c @@ -450,7 +450,8 @@ recurse: if (err) goto out_free; - kobject_uevent(&b->kobj, KOBJ_ADD); + if (b) + kobject_uevent(&b->kobj, KOBJ_ADD); return err; -- cgit v1.2.3 From b10db7f0d2b589a7f88dc3026e150756cb437a28 Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Wed, 30 Jan 2008 13:30:00 +0100 Subject: time: more timer related cleanups I was confused by FSEC = 10^15 NSEC statement, plus small whitespace fixes. When there's copyright, there should be GPL. Signed-off-by: Pavel Machek Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/hpet.c | 3 ++- arch/x86/kernel/process_64.c | 2 +- kernel/softirq.c | 4 +++- kernel/time/tick-sched.c | 2 +- kernel/time/timer_stats.c | 2 +- 5 files changed, 8 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 2f99ee206b95..9ec2ab793042 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -16,7 +16,8 @@ #define HPET_MASK CLOCKSOURCE_MASK(32) #define HPET_SHIFT 22 -/* FSEC = 10^-15 NSEC = 10^-9 */ +/* FSEC = 10^-15 + NSEC = 10^-9 */ #define FSEC_PER_NSEC 1000000 /* diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index ab79e1dfa023..c2db7ef93565 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -207,7 +207,7 @@ static inline void play_dead(void) * low exit latency (ie sit in a loop waiting for * somebody to say that they'd like to reschedule) */ -void cpu_idle (void) +void cpu_idle(void) { current_thread_info()->status |= TS_POLLING; /* endless idle loop with no priority at all */ diff --git a/kernel/softirq.c b/kernel/softirq.c index bd89bc4eb0b9..8fe1ff40102d 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -3,7 +3,9 @@ * * Copyright (C) 1992 Linus Torvalds * - * Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903) + * Distribute under GPLv2. + * + * Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903) */ #include diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index d36ee2fd1a3b..49e12f6a4bab 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -9,7 +9,7 @@ * * Started by: Thomas Gleixner and Ingo Molnar * - * For licencing details see kernel-base/COPYING + * Distribute under GPLv2. */ #include #include diff --git a/kernel/time/timer_stats.c b/kernel/time/timer_stats.c index c36bb7ed0301..417da8c5bc72 100644 --- a/kernel/time/timer_stats.c +++ b/kernel/time/timer_stats.c @@ -26,7 +26,7 @@ * the pid and cmdline from the owner process if applicable. * * Start/stop data collection: - * # echo 1[0] >/proc/timer_stats + * # echo [1|0] >/proc/timer_stats * * Display the information collected so far: * # cat /proc/timer_stats -- cgit v1.2.3 From 316da3b3fc8efa9a5d2c99e0d449f01ff38c6aba Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 30 Jan 2008 13:30:02 +0100 Subject: x86: restrict PIT clocksource usage PIT clocksource is registered unconditionally even when HPET is enabled or when PIT is replaced by the local APIC timer. In both cases PIT can not be used as it is stopped and the readout would be stale. Prevent registering PIT in those cases. patch depends on: x86: offer is_hpet_enabled() on !CONFIG_HPET_TIMER too Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/kernel/i8253.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c index a42c80745325..0f8f35458a8f 100644 --- a/arch/x86/kernel/i8253.c +++ b/arch/x86/kernel/i8253.c @@ -197,7 +197,15 @@ static struct clocksource clocksource_pit = { static int __init init_pit_clocksource(void) { - if (num_possible_cpus() > 1) /* PIT does not scale! */ + /* + * Several reasons not to register PIT as a clocksource: + * + * - On SMP PIT does not scale due to i8253_lock + * - when HPET is enabled + * - when local APIC timer is active (PIT is switched off) + */ + if (num_possible_cpus() > 1 || is_hpet_enabled() || + pit_clockevent.mode != CLOCK_EVT_MODE_PERIODIC) return 0; clocksource_pit.mult = clocksource_hz2mult(CLOCK_TICK_RATE, 20); -- cgit v1.2.3 From 4713e22ce81eb8b3353e16435362eb3d0ec95640 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 30 Jan 2008 13:30:02 +0100 Subject: clocksource: add unregister function to disable unusable clocksources On x86 the PIT might become an unusable clocksource. Add an unregister function to provide a possibilty to remove the PIT from the list of available clock sources. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/kernel/i8253.c | 1 + include/linux/clocksource.h | 1 + kernel/time/clocksource.c | 15 +++++++++++++++ 3 files changed, 17 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c index 0f8f35458a8f..decc5d294d76 100644 --- a/arch/x86/kernel/i8253.c +++ b/arch/x86/kernel/i8253.c @@ -13,6 +13,7 @@ #include #include #include +#include DEFINE_SPINLOCK(i8253_lock); EXPORT_SYMBOL(i8253_lock); diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 07b42153de24..85778a4b1209 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -215,6 +215,7 @@ static inline void clocksource_calculate_interval(struct clocksource *c, /* used to install a new clocksource */ extern int clocksource_register(struct clocksource*); +extern void clocksource_unregister(struct clocksource*); extern struct clocksource* clocksource_get_next(void); extern void clocksource_change_rating(struct clocksource *cs, int rating); extern void clocksource_resume(void); diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index edd5ef8e1765..6e9259a5d501 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -337,6 +337,21 @@ void clocksource_change_rating(struct clocksource *cs, int rating) spin_unlock_irqrestore(&clocksource_lock, flags); } +/** + * clocksource_unregister - remove a registered clocksource + */ +void clocksource_unregister(struct clocksource *cs) +{ + unsigned long flags; + + spin_lock_irqsave(&clocksource_lock, flags); + list_del(&cs->list); + if (clocksource_override == cs) + clocksource_override = NULL; + next_clocksource = select_clocksource(); + spin_unlock_irqrestore(&clocksource_lock, flags); +} + #ifdef CONFIG_SYSFS /** * sysfs_show_current_clocksources - sysfs interface for current clocksource -- cgit v1.2.3 From 1a0c009ac53de4a7664a1239936f0bc258133156 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 30 Jan 2008 13:30:03 +0100 Subject: x86: unregister PIT clocksource when PIT is disabled The following scenario might leave PIT as a disfunctional clock source: PIT is registered as clocksource PM_TIMER is registered as clocksource and enables highres/dyntick mode PIT is switched to oneshot mode -> now the readout of PIT is bogus, but the user might select PIT via the sysfs override, which would break the box as the time readout is unusable. Unregister the PIT clocksource when the PIT clock event device is switched into shutdown / oneshot mode. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/kernel/i8253.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c index decc5d294d76..377c3f8411f8 100644 --- a/arch/x86/kernel/i8253.c +++ b/arch/x86/kernel/i8253.c @@ -18,6 +18,12 @@ DEFINE_SPINLOCK(i8253_lock); EXPORT_SYMBOL(i8253_lock); +#ifdef CONFIG_X86_32 +static void pit_disable_clocksource(void); +#else +static inline void pit_disable_clocksource(void) { } +#endif + /* * HPET replaces the PIT, when enabled. So we need to know, which of * the two timers is used @@ -52,11 +58,13 @@ static void init_pit_timer(enum clock_event_mode mode, outb_p(0, PIT_CH0); outb_p(0, PIT_CH0); } + pit_disable_clocksource(); break; case CLOCK_EVT_MODE_ONESHOT: /* One shot setup */ outb_p(0x38, PIT_MODE); + pit_disable_clocksource(); break; case CLOCK_EVT_MODE_RESUME: @@ -196,6 +204,17 @@ static struct clocksource clocksource_pit = { .shift = 20, }; +static void pit_disable_clocksource(void) +{ + /* + * Use mult to check whether it is registered or not + */ + if (clocksource_pit.mult) { + clocksource_unregister(&clocksource_pit); + clocksource_pit.mult = 0; + } +} + static int __init init_pit_clocksource(void) { /* -- cgit v1.2.3 From e3f37a54f690d3e64995ea7ecea08c5ab3070faf Mon Sep 17 00:00:00 2001 From: Balaji Rao Date: Wed, 30 Jan 2008 13:30:03 +0100 Subject: x86: assign IRQs to HPET timers The userspace API for the HPET (see Documentation/hpet.txt) did not work. The HPET_IE_ON ioctl was failing as there was no IRQ assigned to the timer device. This patch fixes it by allocating IRQs to timer blocks in the HPET. arch/x86/kernel/hpet.c | 13 +++++-------- drivers/char/hpet.c | 45 ++++++++++++++++++++++++++++++++++++++------- include/linux/hpet.h | 2 +- 3 files changed, 44 insertions(+), 16 deletions(-) Signed-off-by: Balaji Rao Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/hpet.c | 13 +++++-------- drivers/char/hpet.c | 45 ++++++++++++++++++++++++++++++++++++++------- include/linux/hpet.h | 2 +- 3 files changed, 44 insertions(+), 16 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 9ec2ab793042..786aa227afdf 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -117,8 +117,7 @@ int is_hpet_enabled(void) static void hpet_reserve_platform_timers(unsigned long id) { struct hpet __iomem *hpet = hpet_virt_address; - struct hpet_timer __iomem *timer = &hpet->hpet_timers[2]; - unsigned int nrtimers, i; + unsigned int nrtimers; struct hpet_data hd; nrtimers = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT) + 1; @@ -133,16 +132,14 @@ static void hpet_reserve_platform_timers(unsigned long id) #ifdef CONFIG_HPET_EMULATE_RTC hpet_reserve_timer(&hd, 1); #endif - hd.hd_irq[0] = HPET_LEGACY_8254; hd.hd_irq[1] = HPET_LEGACY_RTC; - for (i = 2; i < nrtimers; timer++, i++) - hd.hd_irq[i] = (timer->hpet_config & Tn_INT_ROUTE_CNF_MASK) >> - Tn_INT_ROUTE_CNF_SHIFT; - + /* + * IRQs for the other timers are assigned dynamically + * in hpet_alloc + */ hpet_alloc(&hd); - } #else static void hpet_reserve_platform_timers(unsigned long id) { } diff --git a/drivers/char/hpet.c b/drivers/char/hpet.c index 4c16778e3f84..593b32cfbc33 100644 --- a/drivers/char/hpet.c +++ b/drivers/char/hpet.c @@ -806,14 +806,14 @@ static unsigned long hpet_calibrate(struct hpets *hpetp) int hpet_alloc(struct hpet_data *hdp) { - u64 cap, mcfg; + u64 cap, mcfg, hpet_config; struct hpet_dev *devp; - u32 i, ntimer; + u32 i, ntimer, irq; struct hpets *hpetp; size_t siz; struct hpet __iomem *hpet; static struct hpets *last = NULL; - unsigned long period; + unsigned long period, irq_bitmap; unsigned long long temp; /* @@ -840,11 +840,41 @@ int hpet_alloc(struct hpet_data *hdp) hpetp->hp_hpet_phys = hdp->hd_phys_address; hpetp->hp_ntimer = hdp->hd_nirqs; + hpet = hpetp->hp_hpet; - for (i = 0; i < hdp->hd_nirqs; i++) - hpetp->hp_dev[i].hd_hdwirq = hdp->hd_irq[i]; + /* Assign IRQs statically for legacy devices */ + hpetp->hp_dev[0].hd_hdwirq = hdp->hd_irq[0]; + hpetp->hp_dev[1].hd_hdwirq = hdp->hd_irq[1]; - hpet = hpetp->hp_hpet; + /* Assign IRQs dynamically for the others */ + for (i = 2, devp = &hpetp->hp_dev[2]; i < hdp->hd_nirqs; i++, devp++) { + struct hpet_timer __iomem *timer; + + timer = &hpet->hpet_timers[devp - hpetp->hp_dev]; + + hpet_config = readq(&timer->hpet_config); + irq_bitmap = (hpet_config & Tn_INT_ROUTE_CAP_MASK) + >> Tn_INT_ROUTE_CAP_SHIFT; + if (!irq_bitmap) + irq = 0; /* No valid IRQ Assignable */ + else { + irq = find_first_bit(&irq_bitmap, 32); + do { + hpet_config |= irq << Tn_INT_ROUTE_CNF_SHIFT; + writeq(hpet_config, &timer->hpet_config); + + /* + * Verify whether we have written a valid + * IRQ number by reading it back again + */ + hpet_config = readq(&timer->hpet_config); + if (irq == (hpet_config & Tn_INT_ROUTE_CNF_MASK) + >> Tn_INT_ROUTE_CNF_SHIFT) + break; /* Success */ + } while ((irq = (find_next_bit(&irq_bitmap, 32, irq)))); + } + hpetp->hp_dev[i].hd_hdwirq = irq; + } cap = readq(&hpet->hpet_cap); @@ -875,7 +905,8 @@ int hpet_alloc(struct hpet_data *hdp) hpetp->hp_which, hdp->hd_phys_address, hpetp->hp_ntimer > 1 ? "s" : ""); for (i = 0; i < hpetp->hp_ntimer; i++) - printk("%s %d", i > 0 ? "," : "", hdp->hd_irq[i]); + printk("%s %d", i > 0 ? "," : "", + hpetp->hp_dev[i].hd_hdwirq); printk("\n"); printk(KERN_INFO "hpet%u: %u %d-bit timers, %Lu Hz\n", diff --git a/include/linux/hpet.h b/include/linux/hpet.h index 707f7cb9e795..e3c0b2aa944c 100644 --- a/include/linux/hpet.h +++ b/include/linux/hpet.h @@ -64,7 +64,7 @@ struct hpet { */ #define Tn_INT_ROUTE_CAP_MASK (0xffffffff00000000ULL) -#define Tn_INI_ROUTE_CAP_SHIFT (32UL) +#define Tn_INT_ROUTE_CAP_SHIFT (32UL) #define Tn_FSB_INT_DELCAP_MASK (0x8000UL) #define Tn_FSB_INT_DELCAP_SHIFT (15) #define Tn_FSB_EN_CNF_MASK (0x4000UL) -- cgit v1.2.3 From 37a47db8d7f0f38dac5acf5a13abbc8f401707fa Mon Sep 17 00:00:00 2001 From: Balaji Rao Date: Wed, 30 Jan 2008 13:30:03 +0100 Subject: x86: assign IRQs to HPET timers, fix Looks like IRQ 31 is assigned to timer 3, even without the patch! I wonder who wrote the number 31. But the manual says that it is zero by default. I think we should check whether the timer has been allocated an IRQ before proceeding to assign one to it. Here is a patch that does this. Signed-off-by: Balaji Rao Tested-by: Yinghai Lu Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/hpet.c | 10 +++++----- drivers/char/hpet.c | 6 ++++++ 2 files changed, 11 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 786aa227afdf..a3c56c9b8a02 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -117,7 +117,8 @@ int is_hpet_enabled(void) static void hpet_reserve_platform_timers(unsigned long id) { struct hpet __iomem *hpet = hpet_virt_address; - unsigned int nrtimers; + struct hpet_timer __iomem *timer = &hpet->hpet_timers[2]; + unsigned int nrtimers, i; struct hpet_data hd; nrtimers = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT) + 1; @@ -135,10 +136,9 @@ static void hpet_reserve_platform_timers(unsigned long id) hd.hd_irq[0] = HPET_LEGACY_8254; hd.hd_irq[1] = HPET_LEGACY_RTC; - /* - * IRQs for the other timers are assigned dynamically - * in hpet_alloc - */ + for (i = 2; i < nrtimers; timer++, i++) + hd.hd_irq[i] = (timer->hpet_config & Tn_INT_ROUTE_CNF_MASK) >> + Tn_INT_ROUTE_CNF_SHIFT; hpet_alloc(&hd); } #else diff --git a/drivers/char/hpet.c b/drivers/char/hpet.c index 593b32cfbc33..22f5fd02ea87 100644 --- a/drivers/char/hpet.c +++ b/drivers/char/hpet.c @@ -852,6 +852,12 @@ int hpet_alloc(struct hpet_data *hdp) timer = &hpet->hpet_timers[devp - hpetp->hp_dev]; + /* Check if there's already an IRQ assigned to the timer */ + if (hdp->hd_irq[i]) { + hpetp->hp_dev[i].hd_hdwirq = hdp->hd_irq[i]; + continue; + } + hpet_config = readq(&timer->hpet_config); irq_bitmap = (hpet_config & Tn_INT_ROUTE_CAP_MASK) >> Tn_INT_ROUTE_CAP_SHIFT; -- cgit v1.2.3 From 5c9c9bec0589be696c70c5efb448b17d5ab720e2 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 30 Jan 2008 13:30:04 +0100 Subject: x86: hibernation: document __save_processor_state() on x86 Document the fact that __save_processor_state() has to save all CPU registers referred to by the kernel in case a different kernel is used to load and restore a hibernation image containing it. Sigend-off-by: Rafael J. Wysocki Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/suspend_64.c | 20 ++++++++++++++++++++ include/asm-x86/suspend_64.h | 9 ++++++++- 2 files changed, 28 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/suspend_64.c b/arch/x86/kernel/suspend_64.c index 2e5efaaf8800..569f1b540e36 100644 --- a/arch/x86/kernel/suspend_64.c +++ b/arch/x86/kernel/suspend_64.c @@ -19,6 +19,21 @@ extern const void __nosave_begin, __nosave_end; struct saved_context saved_context; +/** + * __save_processor_state - save CPU registers before creating a + * hibernation image and before restoring the memory state from it + * @ctxt - structure to store the registers contents in + * + * NOTE: If there is a CPU register the modification of which by the + * boot kernel (ie. the kernel used for loading the hibernation image) + * might affect the operations of the restored target kernel (ie. the one + * saved in the hibernation image), then its contents must be saved by this + * function. In other words, if kernel A is hibernated and different + * kernel B is used for loading the hibernation image into memory, the + * kernel A's __save_processor_state() function must save all registers + * needed by kernel A, so that it can operate correctly after the resume + * regardless of what kernel B does in the meantime. + */ void __save_processor_state(struct saved_context *ctxt) { kernel_fpu_begin(); @@ -69,6 +84,11 @@ static void do_fpu_end(void) kernel_fpu_end(); } +/** + * __restore_processor_state - restore the contents of CPU registers saved + * by __save_processor_state() + * @ctxt - structure to load the registers contents from + */ void __restore_processor_state(struct saved_context *ctxt) { /* diff --git a/include/asm-x86/suspend_64.h b/include/asm-x86/suspend_64.h index c505a76bcf6e..4404668f9aa4 100644 --- a/include/asm-x86/suspend_64.h +++ b/include/asm-x86/suspend_64.h @@ -15,7 +15,14 @@ arch_prepare_suspend(void) return 0; } -/* Image of the saved processor state. If you touch this, fix acpi/wakeup.S. */ +/* + * Image of the saved processor state, used by the low level ACPI suspend to + * RAM code and by the low level hibernation code. + * + * If you modify it, fix arch/x86/kernel/acpi/wakeup_64.S and make sure that + * __save/__restore_processor_state(), defined in arch/x86/kernel/suspend_64.c, + * still work as required. + */ struct saved_context { struct pt_regs regs; u16 ds, es, fs, gs, ss; -- cgit v1.2.3 From 4c6b8b4d62fb4cb843c32db71e0a8301039908f3 Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Wed, 30 Jan 2008 13:30:04 +0100 Subject: x86: fix: s2ram + P4 + tsc = annoyance s2ram recently became useful here, except for the kernel's annoying habit of disabling my P4's perfectly good TSC. [ 107.894470] CPU 1 is now offline [ 107.894474] SMP alternatives: switching to UP code [ 107.895832] CPU0 attaching sched-domain: [ 107.895836] domain 0: span 1 [ 107.895838] groups: 1 [ 107.896097] CPU1 is down [ 3.726156] Intel machine check architecture supported. [ 3.726165] Intel machine check reporting enabled on CPU#0. [ 3.726167] CPU0: Intel P4/Xeon Extended MCE MSRs (12) available [ 3.726170] CPU0: Thermal monitoring enabled [ 3.726175] Back to C! [ 3.726708] Force enabled HPET at resume [ 3.726775] Enabling non-boot CPUs ... [ 3.727049] CPU0 attaching NULL sched-domain. [ 3.727165] SMP alternatives: switching to SMP code [ 3.727858] Booting processor 1/1 eip 3000 [ 3.727862] CPU 1 irqstacks, hard=b042f000 soft=b042d000 [ 3.738173] Initializing CPU#1 [ 3.798912] Calibrating delay using timer specific routine.. 5986.12 BogoMIPS (lpj=2993061) [ 3.798920] CPU: After generic identify, caps: bfebfbff 00000000 00000000 00000000 00004400 00000000 00000000 00000000 [ 3.798931] CPU: Trace cache: 12K uops, L1 D cache: 8K [ 3.798934] CPU: L2 cache: 512K [ 3.798936] CPU: Physical Processor ID: 0 [ 3.798938] CPU: After all inits, caps: bfebfbff 00000000 00000000 0000b080 00004400 00000000 00000000 00000000 [ 3.798946] Intel machine check architecture supported. [ 3.798952] Intel machine check reporting enabled on CPU#1. [ 3.798955] CPU1: Intel P4/Xeon Extended MCE MSRs (12) available [ 3.798959] CPU1: Thermal monitoring enabled [ 3.799161] CPU1: Intel(R) Pentium(R) 4 CPU 3.00GHz stepping 09 [ 3.799187] checking TSC synchronization [CPU#0 -> CPU#1]: [ 3.819181] Measured 63588552840 cycles TSC warp between CPUs, turning off TSC clock. [ 3.819184] Marking TSC unstable due to: check_tsc_sync_source failed. If check_tsc_warp() is called after initial boot, and the TSC has in the meantime been set (BIOS, user, silicon, elves) to a value lower than the last stored/stale value, we blame the TSC. Reset to pristine condition after every test. Signed-off-by: Mike Galbraith Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/tsc_sync.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c index 9125efe66a06..05d8f25de6ae 100644 --- a/arch/x86/kernel/tsc_sync.c +++ b/arch/x86/kernel/tsc_sync.c @@ -129,23 +129,23 @@ void __cpuinit check_tsc_sync_source(int cpu) while (atomic_read(&stop_count) != cpus-1) cpu_relax(); - /* - * Reset it - just in case we boot another CPU later: - */ - atomic_set(&start_count, 0); - if (nr_warps) { printk("\n"); printk(KERN_WARNING "Measured %Ld cycles TSC warp between CPUs," " turning off TSC clock.\n", max_warp); mark_tsc_unstable("check_tsc_sync_source failed"); - nr_warps = 0; - max_warp = 0; - last_tsc = 0; } else { printk(" passed.\n"); } + /* + * Reset it - just in case we boot another CPU later: + */ + atomic_set(&start_count, 0); + nr_warps = 0; + max_warp = 0; + last_tsc = 0; + /* * Let the target continue with the bootup: */ -- cgit v1.2.3 From b02aae9cf52956dfe1bec73f77f81a3d05d3902b Mon Sep 17 00:00:00 2001 From: Rene Herman Date: Wed, 30 Jan 2008 13:30:05 +0100 Subject: x86: provide a DMI based port 0x80 I/O delay override. x86: provide a DMI based port 0x80 I/O delay override. Certain (HP) laptops experience trouble from our port 0x80 I/O delay writes. This patch provides for a DMI based switch to the "alternate diagnostic port" 0xed (as used by some BIOSes as well) for these. David P. Reed confirmed that port 0xed works for him and provides a proper delay. The symptoms of _not_ working are a hanging machine, with "hwclock" use being a direct trigger. Earlier versions of this attempted to simply use udelay(2), with the 2 being a value tested to be a nicely conservative upper-bound with help from many on the linux-kernel mailinglist but that approach has two problems. First, pre-loops_per_jiffy calibration (which is post PIT init while some implementations of the PIT are actually one of the historically problematic devices that need the delay) udelay() isn't particularly well-defined. We could initialise loops_per_jiffy conservatively (and based on CPU family so as to not unduly delay old machines) which would sort of work, but... Second, delaying isn't the only effect that a write to port 0x80 has. It's also a PCI posting barrier which some devices may be explicitly or implicitly relying on. Alan Cox did a survey and found evidence that additionally some drivers may be racy on SMP without the bus locking outb. Switching to an inb() makes the timing too unpredictable and as such, this DMI based switch should be the safest approach for now. Any more invasive changes should get more rigid testing first. It's moreover only very few machines with the problem and a DMI based hack seems to fit that situation. This also introduces a command-line parameter "io_delay" to override the DMI based choice again: io_delay= where "standard" means using the standard port 0x80 and "alternate" port 0xed. This retains the udelay method as a config (CONFIG_UDELAY_IO_DELAY) and command-line ("io_delay=udelay") choice for testing purposes as well. This does not change the io_delay() in the boot code which is using the same port 0x80 I/O delay but those do not appear to be a problem as David P. Reed reported the problem was already gone after using the udelay version. He moreover reported that booting with "acpi=off" also fixed things and seeing as how ACPI isn't touched until after this DMI based I/O port switch I believe it's safe to leave the ones in the boot code be. The DMI strings from David's HP Pavilion dv9000z are in there already and we need to get/verify the DMI info from other machines with the problem, notably the HP Pavilion dv6000z. This patch is partly based on earlier patches from Pavel Machek and David P. Reed. Signed-off-by: Rene Herman Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- Documentation/kernel-parameters.txt | 8 +++ arch/x86/Kconfig.debug | 9 +++ arch/x86/boot/compressed/misc_32.c | 8 +-- arch/x86/boot/compressed/misc_64.c | 8 +-- arch/x86/kernel/Makefile_32 | 2 +- arch/x86/kernel/Makefile_64 | 2 +- arch/x86/kernel/io_delay.c | 106 ++++++++++++++++++++++++++++++++++++ arch/x86/kernel/setup_32.c | 2 + arch/x86/kernel/setup_64.c | 2 + include/asm-x86/io_32.h | 8 ++- include/asm-x86/io_64.h | 33 +++++++---- 11 files changed, 165 insertions(+), 23 deletions(-) create mode 100644 arch/x86/kernel/io_delay.c (limited to 'arch/x86/kernel') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 880f882160e2..9e6056058425 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -794,6 +794,14 @@ and is between 256 and 4096 characters. It is defined in the file for translation below 32 bit and if not available then look in the higher range. + io_delay= [X86-32,X86-64] I/O delay method + standard + Standard port 0x80 delay + alternate + Alternate port 0xed delay + udelay + Simple two microsecond delay + io7= [HW] IO7 for Marvel based alpha systems See comment before marvel_specify_io7 in arch/alpha/kernel/core_marvel.c. diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 761ca7b5f120..40aba670fb37 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -112,4 +112,13 @@ config IOMMU_LEAK Add a simple leak tracer to the IOMMU code. This is useful when you are debugging a buggy device driver that leaks IOMMU mappings. +config UDELAY_IO_DELAY + bool "Delay I/O through udelay instead of outb" + depends on DEBUG_KERNEL + help + Make inb_p/outb_p use udelay() based delays by default. Please note + that udelay() does not have the same bus-level side-effects that + the normal outb based delay does meaning this could cause drivers + to change behaviour and/or bugs to surface. + endmenu diff --git a/arch/x86/boot/compressed/misc_32.c b/arch/x86/boot/compressed/misc_32.c index b74d60d1b2fa..288e16283ef9 100644 --- a/arch/x86/boot/compressed/misc_32.c +++ b/arch/x86/boot/compressed/misc_32.c @@ -276,10 +276,10 @@ static void putstr(const char *s) RM_SCREEN_INFO.orig_y = y; pos = (x + cols * y) * 2; /* Update cursor position */ - outb_p(14, vidport); - outb_p(0xff & (pos >> 9), vidport+1); - outb_p(15, vidport); - outb_p(0xff & (pos >> 1), vidport+1); + outb(14, vidport); + outb(0xff & (pos >> 9), vidport+1); + outb(15, vidport); + outb(0xff & (pos >> 1), vidport+1); } static void* memset(void* s, int c, unsigned n) diff --git a/arch/x86/boot/compressed/misc_64.c b/arch/x86/boot/compressed/misc_64.c index 6ea015aa65e4..43e5fcc37be9 100644 --- a/arch/x86/boot/compressed/misc_64.c +++ b/arch/x86/boot/compressed/misc_64.c @@ -269,10 +269,10 @@ static void putstr(const char *s) RM_SCREEN_INFO.orig_y = y; pos = (x + cols * y) * 2; /* Update cursor position */ - outb_p(14, vidport); - outb_p(0xff & (pos >> 9), vidport+1); - outb_p(15, vidport); - outb_p(0xff & (pos >> 1), vidport+1); + outb(14, vidport); + outb(0xff & (pos >> 9), vidport+1); + outb(15, vidport); + outb(0xff & (pos >> 1), vidport+1); } static void* memset(void* s, int c, unsigned n) diff --git a/arch/x86/kernel/Makefile_32 b/arch/x86/kernel/Makefile_32 index a7bc93c27662..0cc1981d1e38 100644 --- a/arch/x86/kernel/Makefile_32 +++ b/arch/x86/kernel/Makefile_32 @@ -8,7 +8,7 @@ CPPFLAGS_vmlinux.lds += -Ui386 obj-y := process_32.o signal_32.o entry_32.o traps_32.o irq_32.o \ ptrace_32.o time_32.o ioport_32.o ldt_32.o setup_32.o i8259_32.o sys_i386_32.o \ pci-dma_32.o i386_ksyms_32.o i387_32.o bootflag.o e820_32.o\ - quirks.o i8237.o topology.o alternative.o i8253.o tsc_32.o + quirks.o i8237.o topology.o alternative.o i8253.o tsc_32.o io_delay.o obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-y += cpu/ diff --git a/arch/x86/kernel/Makefile_64 b/arch/x86/kernel/Makefile_64 index 5a88890d8ee9..08a68f0d8fda 100644 --- a/arch/x86/kernel/Makefile_64 +++ b/arch/x86/kernel/Makefile_64 @@ -11,7 +11,7 @@ obj-y := process_64.o signal_64.o entry_64.o traps_64.o irq_64.o \ x8664_ksyms_64.o i387_64.o syscall_64.o vsyscall_64.o \ setup64.o bootflag.o e820_64.o reboot_64.o quirks.o i8237.o \ pci-dma_64.o pci-nommu_64.o alternative.o hpet.o tsc_64.o bugs_64.o \ - i8253.o + i8253.o io_delay.o obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-y += cpu/ diff --git a/arch/x86/kernel/io_delay.c b/arch/x86/kernel/io_delay.c new file mode 100644 index 000000000000..4d955e74b974 --- /dev/null +++ b/arch/x86/kernel/io_delay.c @@ -0,0 +1,106 @@ +/* + * I/O delay strategies for inb_p/outb_p + */ +#include +#include +#include +#include +#include +#include + +/* + * Allow for a DMI based override of port 0x80 needed for certain HP laptops + */ +#define IO_DELAY_PORT_STD 0x80 +#define IO_DELAY_PORT_ALT 0xed + +static void standard_io_delay(void) +{ + asm volatile ("outb %%al, %0" : : "N" (IO_DELAY_PORT_STD)); +} + +static void alternate_io_delay(void) +{ + asm volatile ("outb %%al, %0" : : "N" (IO_DELAY_PORT_ALT)); +} + +/* + * 2 usecs is an upper-bound for the outb delay but note that udelay doesn't + * have the bus-level side-effects that outb does + */ +#define IO_DELAY_USECS 2 + +/* + * High on a hill was a lonely goatherd + */ +static void udelay_io_delay(void) +{ + udelay(IO_DELAY_USECS); +} + +#ifndef CONFIG_UDELAY_IO_DELAY +static void (*io_delay)(void) = standard_io_delay; +#else +static void (*io_delay)(void) = udelay_io_delay; +#endif + +/* + * Paravirt wants native_io_delay to be a constant. + */ +void native_io_delay(void) +{ + io_delay(); +} +EXPORT_SYMBOL(native_io_delay); + +#ifndef CONFIG_UDELAY_IO_DELAY +static int __init dmi_alternate_io_delay_port(const struct dmi_system_id *id) +{ + printk(KERN_NOTICE "%s: using alternate I/O delay port\n", id->ident); + io_delay = alternate_io_delay; + return 0; +} + +static struct dmi_system_id __initdata alternate_io_delay_port_dmi_table[] = { + { + .callback = dmi_alternate_io_delay_port, + .ident = "HP Pavilion dv9000z", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"), + DMI_MATCH(DMI_BOARD_NAME, "30B9") + } + }, + { + } +}; + +static int __initdata io_delay_override; + +void __init io_delay_init(void) +{ + if (!io_delay_override) + dmi_check_system(alternate_io_delay_port_dmi_table); +} +#endif + +static int __init io_delay_param(char *s) +{ + if (!s) + return -EINVAL; + + if (!strcmp(s, "standard")) + io_delay = standard_io_delay; + else if (!strcmp(s, "alternate")) + io_delay = alternate_io_delay; + else if (!strcmp(s, "udelay")) + io_delay = udelay_io_delay; + else + return -EINVAL; + +#ifndef CONFIG_UDELAY_IO_DELAY + io_delay_override = 1; +#endif + return 0; +} + +early_param("io_delay", io_delay_param); diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c index 9c24b45b513c..51bdc0b1b72e 100644 --- a/arch/x86/kernel/setup_32.c +++ b/arch/x86/kernel/setup_32.c @@ -648,6 +648,8 @@ void __init setup_arch(char **cmdline_p) dmi_scan_machine(); + io_delay_init();; + #ifdef CONFIG_X86_GENERICARCH generic_apic_probe(); #endif diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index 30d94d1d5f5f..ec976edf0399 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c @@ -311,6 +311,8 @@ void __init setup_arch(char **cmdline_p) dmi_scan_machine(); + io_delay_init(); + #ifdef CONFIG_SMP /* setup to use the static apicid table during kernel startup */ x86_cpu_to_apicid_ptr = (void *)&x86_cpu_to_apicid_init; diff --git a/include/asm-x86/io_32.h b/include/asm-x86/io_32.h index fe881cd1e6f4..a8d25c38b91c 100644 --- a/include/asm-x86/io_32.h +++ b/include/asm-x86/io_32.h @@ -250,10 +250,14 @@ static inline void flush_write_buffers(void) #endif /* __KERNEL__ */ -static inline void native_io_delay(void) +#ifndef CONFIG_UDELAY_IO_DELAY +extern void io_delay_init(void); +#else +static inline void io_delay_init(void) { - asm volatile("outb %%al,$0x80" : : : "memory"); } +#endif +extern void native_io_delay(void); #if defined(CONFIG_PARAVIRT) #include diff --git a/include/asm-x86/io_64.h b/include/asm-x86/io_64.h index a037b0794332..5bebaf961692 100644 --- a/include/asm-x86/io_64.h +++ b/include/asm-x86/io_64.h @@ -35,13 +35,24 @@ * - Arnaldo Carvalho de Melo */ -#define __SLOW_DOWN_IO "\noutb %%al,$0x80" +#ifndef CONFIG_UDELAY_IO_DELAY +extern void io_delay_init(void); +#else +static inline void io_delay_init(void) +{ +} +#endif +extern void native_io_delay(void); +static inline void slow_down_io(void) +{ + native_io_delay(); #ifdef REALLY_SLOW_IO -#define __FULL_SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO -#else -#define __FULL_SLOW_DOWN_IO __SLOW_DOWN_IO + native_io_delay(); + native_io_delay(); + native_io_delay(); #endif +} /* * Talk about misusing macros.. @@ -50,21 +61,21 @@ static inline void out##s(unsigned x value, unsigned short port) { #define __OUT2(s,s1,s2) \ -__asm__ __volatile__ ("out" #s " %" s1 "0,%" s2 "1" +__asm__ __volatile__ ("out" #s " %" s1 "0,%" s2 "1" : : "a" (value), "Nd" (port)) #define __OUT(s,s1,x) \ -__OUT1(s,x) __OUT2(s,s1,"w") : : "a" (value), "Nd" (port)); } \ -__OUT1(s##_p,x) __OUT2(s,s1,"w") __FULL_SLOW_DOWN_IO : : "a" (value), "Nd" (port));} \ +__OUT1(s,x) __OUT2(s,s1,"w"); } \ +__OUT1(s##_p,x) __OUT2(s,s1,"w"); slow_down_io(); } #define __IN1(s) \ static inline RETURN_TYPE in##s(unsigned short port) { RETURN_TYPE _v; #define __IN2(s,s1,s2) \ -__asm__ __volatile__ ("in" #s " %" s2 "1,%" s1 "0" +__asm__ __volatile__ ("in" #s " %" s2 "1,%" s1 "0" : "=a" (_v) : "Nd" (port)) -#define __IN(s,s1,i...) \ -__IN1(s) __IN2(s,s1,"w") : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \ -__IN1(s##_p) __IN2(s,s1,"w") __FULL_SLOW_DOWN_IO : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \ +#define __IN(s,s1) \ +__IN1(s) __IN2(s,s1,"w"); return _v; } \ +__IN1(s##_p) __IN2(s,s1,"w"); slow_down_io(); return _v; } #define __INS(s) \ static inline void ins##s(unsigned short port, void * addr, unsigned long count) \ -- cgit v1.2.3 From 6e7c402590b75b6b45138792445ee0f0315a8473 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 30 Jan 2008 13:30:05 +0100 Subject: x86: various changes and cleanups to in_p/out_p delay details various changes to the in_p/out_p delay details: - add the io_delay=none method - make each method selectable from the kernel config - simplify the delay code a bit by getting rid of an indirect function call - add the /proc/sys/kernel/io_delay_type sysctl - change 'io_delay=standard|alternate' to io_delay=0x80 and io_delay=0xed - make the io delay config not depend on CONFIG_DEBUG_KERNEL Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner Tested-by: "David P. Reed" --- Documentation/kernel-parameters.txt | 12 ++-- arch/x86/Kconfig.debug | 79 ++++++++++++++++++++++++--- arch/x86/kernel/io_delay.c | 106 +++++++++++++++++------------------- include/asm-x86/io_32.h | 10 +--- include/asm-x86/io_64.h | 10 +--- kernel/sysctl.c | 9 +++ 6 files changed, 143 insertions(+), 83 deletions(-) (limited to 'arch/x86/kernel') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 9e6056058425..b427b7c0e5d0 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -795,12 +795,14 @@ and is between 256 and 4096 characters. It is defined in the file then look in the higher range. io_delay= [X86-32,X86-64] I/O delay method - standard - Standard port 0x80 delay - alternate - Alternate port 0xed delay + 0x80 + Standard port 0x80 based delay + 0xed + Alternate port 0xed based delay (needed on some systems) udelay - Simple two microsecond delay + Simple two microseconds delay + none + No delay io7= [HW] IO7 for Marvel based alpha systems See comment before marvel_specify_io7 in diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 40aba670fb37..77eda46f97b8 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -112,13 +112,78 @@ config IOMMU_LEAK Add a simple leak tracer to the IOMMU code. This is useful when you are debugging a buggy device driver that leaks IOMMU mappings. -config UDELAY_IO_DELAY - bool "Delay I/O through udelay instead of outb" - depends on DEBUG_KERNEL +# +# IO delay types: +# + +config IO_DELAY_TYPE_0X80 + int + default "0" + +config IO_DELAY_TYPE_0XED + int + default "1" + +config IO_DELAY_TYPE_UDELAY + int + default "2" + +config IO_DELAY_TYPE_NONE + int + default "3" + +choice + prompt "IO delay type" + default IO_DELAY_0X80 + +config IO_DELAY_0X80 + bool "port 0x80 based port-IO delay [recommended]" + help + This is the traditional Linux IO delay used for in/out_p. + It is the most tested hence safest selection here. + +config IO_DELAY_0XED + bool "port 0xed based port-IO delay" + help + Use port 0xed as the IO delay. This frees up port 0x80 which is + often used as a hardware-debug port. + +config IO_DELAY_UDELAY + bool "udelay based port-IO delay" + help + Use udelay(2) as the IO delay method. This provides the delay + while not having any side-effect on the IO port space. + +config IO_DELAY_NONE + bool "no port-IO delay" help - Make inb_p/outb_p use udelay() based delays by default. Please note - that udelay() does not have the same bus-level side-effects that - the normal outb based delay does meaning this could cause drivers - to change behaviour and/or bugs to surface. + No port-IO delay. Will break on old boxes that require port-IO + delay for certain operations. Should work on most new machines. + +endchoice + +if IO_DELAY_0X80 +config DEFAULT_IO_DELAY_TYPE + int + default IO_DELAY_TYPE_0X80 +endif + +if IO_DELAY_0XED +config DEFAULT_IO_DELAY_TYPE + int + default IO_DELAY_TYPE_0XED +endif + +if IO_DELAY_UDELAY +config DEFAULT_IO_DELAY_TYPE + int + default IO_DELAY_TYPE_UDELAY +endif + +if IO_DELAY_NONE +config DEFAULT_IO_DELAY_TYPE + int + default IO_DELAY_TYPE_NONE +endif endmenu diff --git a/arch/x86/kernel/io_delay.c b/arch/x86/kernel/io_delay.c index 4d955e74b974..f052e34dc94c 100644 --- a/arch/x86/kernel/io_delay.c +++ b/arch/x86/kernel/io_delay.c @@ -1,5 +1,9 @@ /* * I/O delay strategies for inb_p/outb_p + * + * Allow for a DMI based override of port 0x80, needed for certain HP laptops + * and possibly other systems. Also allow for the gradual elimination of + * outb_p/inb_p API uses. */ #include #include @@ -8,98 +12,86 @@ #include #include -/* - * Allow for a DMI based override of port 0x80 needed for certain HP laptops - */ -#define IO_DELAY_PORT_STD 0x80 -#define IO_DELAY_PORT_ALT 0xed - -static void standard_io_delay(void) -{ - asm volatile ("outb %%al, %0" : : "N" (IO_DELAY_PORT_STD)); -} - -static void alternate_io_delay(void) -{ - asm volatile ("outb %%al, %0" : : "N" (IO_DELAY_PORT_ALT)); -} - -/* - * 2 usecs is an upper-bound for the outb delay but note that udelay doesn't - * have the bus-level side-effects that outb does - */ -#define IO_DELAY_USECS 2 - -/* - * High on a hill was a lonely goatherd - */ -static void udelay_io_delay(void) -{ - udelay(IO_DELAY_USECS); -} +int io_delay_type __read_mostly = CONFIG_DEFAULT_IO_DELAY_TYPE; +EXPORT_SYMBOL_GPL(io_delay_type); -#ifndef CONFIG_UDELAY_IO_DELAY -static void (*io_delay)(void) = standard_io_delay; -#else -static void (*io_delay)(void) = udelay_io_delay; -#endif +static int __initdata io_delay_override; /* * Paravirt wants native_io_delay to be a constant. */ void native_io_delay(void) { - io_delay(); + switch (io_delay_type) { + default: + case CONFIG_IO_DELAY_TYPE_0X80: + asm volatile ("outb %al, $0x80"); + break; + case CONFIG_IO_DELAY_TYPE_0XED: + asm volatile ("outb %al, $0xed"); + break; + case CONFIG_IO_DELAY_TYPE_UDELAY: + /* + * 2 usecs is an upper-bound for the outb delay but + * note that udelay doesn't have the bus-level + * side-effects that outb does, nor does udelay() have + * precise timings during very early bootup (the delays + * are shorter until calibrated): + */ + udelay(2); + case CONFIG_IO_DELAY_TYPE_NONE: + break; + } } EXPORT_SYMBOL(native_io_delay); -#ifndef CONFIG_UDELAY_IO_DELAY -static int __init dmi_alternate_io_delay_port(const struct dmi_system_id *id) +static int __init dmi_io_delay_0xed_port(const struct dmi_system_id *id) { - printk(KERN_NOTICE "%s: using alternate I/O delay port\n", id->ident); - io_delay = alternate_io_delay; + if (io_delay_type == CONFIG_IO_DELAY_TYPE_0X80) { + printk(KERN_NOTICE "%s: using 0xed I/O delay port\n", + id->ident); + io_delay_type = CONFIG_IO_DELAY_TYPE_0XED; + } + return 0; } -static struct dmi_system_id __initdata alternate_io_delay_port_dmi_table[] = { +/* + * Quirk table for systems that misbehave (lock up, etc.) if port + * 0x80 is used: + */ +static struct dmi_system_id __initdata io_delay_0xed_port_dmi_table[] = { { - .callback = dmi_alternate_io_delay_port, + .callback = dmi_io_delay_0xed_port, .ident = "HP Pavilion dv9000z", .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"), DMI_MATCH(DMI_BOARD_NAME, "30B9") } }, - { - } + { } }; -static int __initdata io_delay_override; - void __init io_delay_init(void) { if (!io_delay_override) - dmi_check_system(alternate_io_delay_port_dmi_table); + dmi_check_system(io_delay_0xed_port_dmi_table); } -#endif static int __init io_delay_param(char *s) { - if (!s) - return -EINVAL; - - if (!strcmp(s, "standard")) - io_delay = standard_io_delay; - else if (!strcmp(s, "alternate")) - io_delay = alternate_io_delay; + if (!strcmp(s, "0x80")) + io_delay_type = CONFIG_IO_DELAY_TYPE_0X80; + else if (!strcmp(s, "0xed")) + io_delay_type = CONFIG_IO_DELAY_TYPE_0XED; else if (!strcmp(s, "udelay")) - io_delay = udelay_io_delay; + io_delay_type = CONFIG_IO_DELAY_TYPE_UDELAY; + else if (!strcmp(s, "none")) + io_delay_type = CONFIG_IO_DELAY_TYPE_NONE; else return -EINVAL; -#ifndef CONFIG_UDELAY_IO_DELAY io_delay_override = 1; -#endif return 0; } diff --git a/include/asm-x86/io_32.h b/include/asm-x86/io_32.h index a8d25c38b91c..2a04bd17eac5 100644 --- a/include/asm-x86/io_32.h +++ b/include/asm-x86/io_32.h @@ -250,15 +250,11 @@ static inline void flush_write_buffers(void) #endif /* __KERNEL__ */ -#ifndef CONFIG_UDELAY_IO_DELAY -extern void io_delay_init(void); -#else -static inline void io_delay_init(void) -{ -} -#endif extern void native_io_delay(void); +extern int io_delay_type; +extern void io_delay_init(void); + #if defined(CONFIG_PARAVIRT) #include #else diff --git a/include/asm-x86/io_64.h b/include/asm-x86/io_64.h index 5bebaf961692..dbcc03aa1c6a 100644 --- a/include/asm-x86/io_64.h +++ b/include/asm-x86/io_64.h @@ -35,15 +35,11 @@ * - Arnaldo Carvalho de Melo */ -#ifndef CONFIG_UDELAY_IO_DELAY -extern void io_delay_init(void); -#else -static inline void io_delay_init(void) -{ -} -#endif extern void native_io_delay(void); +extern int io_delay_type; +extern void io_delay_init(void); + static inline void slow_down_io(void) { native_io_delay(); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 4bc8e48434a7..357b68ba23ec 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -53,6 +53,7 @@ #ifdef CONFIG_X86 #include #include +#include #endif static int deprecated_sysctl_warning(struct __sysctl_args *args); @@ -727,6 +728,14 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = &proc_dointvec, }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "io_delay_type", + .data = &io_delay_type, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, #endif #if defined(CONFIG_MMU) { -- cgit v1.2.3 From f9fc58910ebc448b0b7d37af1bf57a896a78e9c4 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 30 Jan 2008 13:30:05 +0100 Subject: x86: add DMI quirk for io-delay hangs on Compaq Presario V6000 laptops add the DMI strings provided by Islam Amer , for the Compaq Presario V6000 (Quanta/30B7). Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/io_delay.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/io_delay.c b/arch/x86/kernel/io_delay.c index f052e34dc94c..bd49321034db 100644 --- a/arch/x86/kernel/io_delay.c +++ b/arch/x86/kernel/io_delay.c @@ -61,6 +61,14 @@ static int __init dmi_io_delay_0xed_port(const struct dmi_system_id *id) * 0x80 is used: */ static struct dmi_system_id __initdata io_delay_0xed_port_dmi_table[] = { + { + .callback = dmi_io_delay_0xed_port, + .ident = "Compaq Presario V6000", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"), + DMI_MATCH(DMI_BOARD_NAME, "30B7") + } + }, { .callback = dmi_io_delay_0xed_port, .ident = "HP Pavilion dv9000z", @@ -69,6 +77,14 @@ static struct dmi_system_id __initdata io_delay_0xed_port_dmi_table[] = { DMI_MATCH(DMI_BOARD_NAME, "30B9") } }, + { + .callback = dmi_io_delay_0xed_port, + .ident = "HP Pavilion tx1000", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"), + DMI_MATCH(DMI_BOARD_NAME, "30BF") + } + }, { } }; -- cgit v1.2.3 From 83bd01024b1fdfc41d9b758e5669e80fca72df66 Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Wed, 30 Jan 2008 13:30:06 +0100 Subject: x86: protect against sigaltstack wraparound cf http://lkml.org/lkml/2007/10/3/41 To summarize: on Linux, SA_ONSTACK decides whether you are already on the signal stack based on the value of the SP at the time of a signal. If you are not already inside the range, you are not "on the signal stack" and so the new signal handler frame starts over at the base of the signal stack. sigaltstack (and sigstack before it) was invented in BSD. There, the SA_ONSTACK behavior has always been different. It uses a kernel state flag to decide, rather than the SP value. When you first take an SA_ONSTACK signal and switch to the alternate signal stack, it sets the SS_ONSTACK flag in the thread's sigaltstack state in the kernel. Thereafter you are "on the signal stack" and don't switch SP before pushing a handler frame no matter what the SP value is. Only when you sigreturn from the original handler context do you clear the SS_ONSTACK flag so that a new handler frame will start over at the base of the alternate signal stack. The undesireable effect of the Linux behavior is that an overflow of the alternate signal stack can not only go undetected, but lead to a ring buffer effect of clobbering the original handler frame at the base of the signal stack for each successive signal that comes just after the overflow. This is what Shi Weihua's test case demonstrates. Normally this does not come up because of the signal mask, but the test case uses SA_NODEFER for its SIGSEGV handler. The other subtle part of the existing Linux semantics is that a simple longjmp out of a signal handler serves to take you off the signal stack in a safe and reliable fashion without having used sigreturn (nor having just returned from the handler normally, which means the same). After the longjmp (or even informal stack switching not via any proper libc or kernel interface), the alternate signal stack stands ready to be used again. A paranoid program would allocate a PROT_NONE red zone around its alternate signal stack. Then a small overflow would trigger a SIGSEGV in handler setup, and be fatal (core dump) whether or not SIGSEGV is blocked. As with thread stack red zones, that cannot catch all overflows (or underflows). e.g., a local array as large as page size allocated in a function called from a handler, but not actually touched before more calls push more stack, could cause an overflow that silently pushes into some unrelated allocated pages. The BSD behavior does not do anything in particular about overflow. But it does at least avoid the wraparound or "ring buffer effect", so you'll just get a straightforward all-out overflow down your address space past the low end of the alternate signal stack. I don't know what the BSD behavior is for longjmp out of an SA_ONSTACK handler. The POSIX wording relating to sigaltstack is pretty minimal. I don't think it speaks to this issue one way or another. (The program that overflows its stack is clearly in undefined behavior territory of one sort or another anyhow.) Given the longjmp issue and the potential for highly subtle complications in existing programs relying on this in arcane ways deep in their code, I am very dubious about changing the behavior to the BSD style persistent flag. I think Shi Weihua's patches have a similar effect by tracking the SP used in the last handler setup. I think it would be sensible for the signal handler setup code to detect when it would itself be causing a stack overflow. Maybe something like the following patch (untested). This issue exists in the same way on all machines, so ideally they would all do a similar check. When it's the handler function itself or its callees that cause the overflow, rather than the signal handler frame setup alone crossing the boundary, this still won't help. But I don't see any way to distinguish that from the valid longjmp case. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/signal_32.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index 20f29e4c1d33..5c6170c44b00 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -295,6 +295,13 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size) /* Default to using normal stack */ esp = regs->esp; + /* + * If we are on the alternate signal stack and would overflow it, don't. + * Return an always-bogus address instead so we will die with SIGSEGV. + */ + if (on_sig_stack(esp) && !likely(on_sig_stack(esp - frame_size))) + return (void __user *) -1L; + /* This is the X/Open sanctioned signal stack switching. */ if (ka->sa.sa_flags & SA_ONSTACK) { if (sas_ss_flags(esp) == 0) -- cgit v1.2.3 From 53d517cdbaac704352b3d0c10fecb99e0b54572e Mon Sep 17 00:00:00 2001 From: Guillaume Chazarain Date: Wed, 30 Jan 2008 13:30:06 +0100 Subject: x86: scale cyc_2_nsec according to CPU frequency scale the sched_clock() cyc_2_nsec scaling factor according to CPU frequency changes. [ mingo@elte.hu: simplified it and fixed it for SMP. ] Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/tsc_32.c | 43 +++++++++++++++++++++++++++++++----- arch/x86/kernel/tsc_64.c | 57 +++++++++++++++++++++++++++++++++++++++--------- include/asm-x86/timer.h | 23 ++++++++++++++----- 3 files changed, 102 insertions(+), 21 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/tsc_32.c b/arch/x86/kernel/tsc_32.c index 9ebc0dab66b4..00bb4c1c0593 100644 --- a/arch/x86/kernel/tsc_32.c +++ b/arch/x86/kernel/tsc_32.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include @@ -80,13 +81,31 @@ EXPORT_SYMBOL_GPL(check_tsc_unstable); * * -johnstul@us.ibm.com "math is hard, lets go shopping!" */ -unsigned long cyc2ns_scale __read_mostly; -#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ +DEFINE_PER_CPU(unsigned long, cyc2ns); -static inline void set_cyc2ns_scale(unsigned long cpu_khz) +static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) { - cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz; + unsigned long flags, prev_scale, *scale; + unsigned long long tsc_now, ns_now; + + local_irq_save(flags); + sched_clock_idle_sleep_event(); + + scale = &per_cpu(cyc2ns, cpu); + + rdtscll(tsc_now); + ns_now = __cycles_2_ns(tsc_now); + + prev_scale = *scale; + if (cpu_khz) + *scale = (NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR)/cpu_khz; + + /* + * Start smoothly with the new frequency: + */ + sched_clock_idle_wakeup_event(0); + local_irq_restore(flags); } /* @@ -239,7 +258,9 @@ time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, void *data) ref_freq, freq->new); if (!(freq->flags & CPUFREQ_CONST_LOOPS)) { tsc_khz = cpu_khz; - set_cyc2ns_scale(cpu_khz); + preempt_disable(); + set_cyc2ns_scale(cpu_khz, smp_processor_id()); + preempt_enable(); /* * TSC based sched_clock turns * to junk w/ cpufreq @@ -367,6 +388,8 @@ static inline void check_geode_tsc_reliable(void) { } void __init tsc_init(void) { + int cpu; + if (!cpu_has_tsc || tsc_disable) goto out_no_tsc; @@ -380,7 +403,15 @@ void __init tsc_init(void) (unsigned long)cpu_khz / 1000, (unsigned long)cpu_khz % 1000); - set_cyc2ns_scale(cpu_khz); + /* + * Secondary CPUs do not run through tsc_init(), so set up + * all the scale factors for all CPUs, assuming the same + * speed as the bootup CPU. (cpufreq notifiers will fix this + * up if their speed diverges) + */ + for_each_possible_cpu(cpu) + set_cyc2ns_scale(cpu_khz, cpu); + use_tsc_delay(); /* Check and install the TSC clocksource */ diff --git a/arch/x86/kernel/tsc_64.c b/arch/x86/kernel/tsc_64.c index 9c70af45b42b..32edd2c50e94 100644 --- a/arch/x86/kernel/tsc_64.c +++ b/arch/x86/kernel/tsc_64.c @@ -10,6 +10,7 @@ #include #include +#include static int notsc __initdata = 0; @@ -18,16 +19,48 @@ EXPORT_SYMBOL(cpu_khz); unsigned int tsc_khz; EXPORT_SYMBOL(tsc_khz); -static unsigned int cyc2ns_scale __read_mostly; +/* Accelerators for sched_clock() + * convert from cycles(64bits) => nanoseconds (64bits) + * basic equation: + * ns = cycles / (freq / ns_per_sec) + * ns = cycles * (ns_per_sec / freq) + * ns = cycles * (10^9 / (cpu_khz * 10^3)) + * ns = cycles * (10^6 / cpu_khz) + * + * Then we use scaling math (suggested by george@mvista.com) to get: + * ns = cycles * (10^6 * SC / cpu_khz) / SC + * ns = cycles * cyc2ns_scale / SC + * + * And since SC is a constant power of two, we can convert the div + * into a shift. + * + * We can use khz divisor instead of mhz to keep a better precision, since + * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits. + * (mathieu.desnoyers@polymtl.ca) + * + * -johnstul@us.ibm.com "math is hard, lets go shopping!" + */ +DEFINE_PER_CPU(unsigned long, cyc2ns); -static inline void set_cyc2ns_scale(unsigned long khz) +static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) { - cyc2ns_scale = (NSEC_PER_MSEC << NS_SCALE) / khz; -} + unsigned long flags, prev_scale, *scale; + unsigned long long tsc_now, ns_now; -static unsigned long long cycles_2_ns(unsigned long long cyc) -{ - return (cyc * cyc2ns_scale) >> NS_SCALE; + local_irq_save(flags); + sched_clock_idle_sleep_event(); + + scale = &per_cpu(cyc2ns, cpu); + + rdtscll(tsc_now); + ns_now = __cycles_2_ns(tsc_now); + + prev_scale = *scale; + if (cpu_khz) + *scale = (NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR)/cpu_khz; + + sched_clock_idle_wakeup_event(0); + local_irq_restore(flags); } unsigned long long sched_clock(void) @@ -100,7 +133,9 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, mark_tsc_unstable("cpufreq changes"); } - set_cyc2ns_scale(tsc_khz_ref); + preempt_disable(); + set_cyc2ns_scale(tsc_khz_ref, smp_processor_id()); + preempt_enable(); return 0; } @@ -151,7 +186,7 @@ static unsigned long __init tsc_read_refs(unsigned long *pm, void __init tsc_calibrate(void) { unsigned long flags, tsc1, tsc2, tr1, tr2, pm1, pm2, hpet1, hpet2; - int hpet = is_hpet_enabled(); + int hpet = is_hpet_enabled(), cpu; local_irq_save(flags); @@ -206,7 +241,9 @@ void __init tsc_calibrate(void) } tsc_khz = tsc2 / tsc1; - set_cyc2ns_scale(tsc_khz); + + for_each_possible_cpu(cpu) + set_cyc2ns_scale(tsc_khz, cpu); } /* diff --git a/include/asm-x86/timer.h b/include/asm-x86/timer.h index 0db7e994fb8b..4f6fcb050c11 100644 --- a/include/asm-x86/timer.h +++ b/include/asm-x86/timer.h @@ -2,6 +2,7 @@ #define _ASMi386_TIMER_H #include #include +#include #define TICK_SIZE (tick_nsec / 1000) @@ -16,7 +17,7 @@ extern int recalibrate_cpu_khz(void); #define calculate_cpu_khz() native_calculate_cpu_khz() #endif -/* Accellerators for sched_clock() +/* Accelerators for sched_clock() * convert from cycles(64bits) => nanoseconds (64bits) * basic equation: * ns = cycles / (freq / ns_per_sec) @@ -31,20 +32,32 @@ extern int recalibrate_cpu_khz(void); * And since SC is a constant power of two, we can convert the div * into a shift. * - * We can use khz divisor instead of mhz to keep a better percision, since + * We can use khz divisor instead of mhz to keep a better precision, since * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits. * (mathieu.desnoyers@polymtl.ca) * * -johnstul@us.ibm.com "math is hard, lets go shopping!" */ -extern unsigned long cyc2ns_scale __read_mostly; + +DECLARE_PER_CPU(unsigned long, cyc2ns); #define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ -static inline unsigned long long cycles_2_ns(unsigned long long cyc) +static inline unsigned long long __cycles_2_ns(unsigned long long cyc) { - return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR; + return cyc * per_cpu(cyc2ns, smp_processor_id()) >> CYC2NS_SCALE_FACTOR; } +static inline unsigned long long cycles_2_ns(unsigned long long cyc) +{ + unsigned long long ns; + unsigned long flags; + + local_irq_save(flags); + ns = __cycles_2_ns(cyc); + local_irq_restore(flags); + + return ns; +} #endif -- cgit v1.2.3 From 5ee613b6751cd91db4b6bd7c1dc9d2f9cf65cde2 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 30 Jan 2008 13:30:06 +0100 Subject: x86: idle wakeup event in the HLT loop do a proper idle-wakeup event on HLT as well - some CPUs stop the TSC in HLT too, not just when going through the ACPI methods. (the ACPI idle code already does this.) [ update the 64-bit side too, as noticed by Jiri Slaby. ] Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/process_32.c | 15 ++++++++++++--- arch/x86/kernel/process_64.c | 13 ++++++++++--- 2 files changed, 22 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 46d391d49de8..a63d2d2556ee 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -113,10 +113,19 @@ void default_idle(void) smp_mb(); local_irq_disable(); - if (!need_resched()) + if (!need_resched()) { + ktime_t t0, t1; + u64 t0n, t1n; + + t0 = ktime_get(); + t0n = ktime_to_ns(t0); safe_halt(); /* enables interrupts racelessly */ - else - local_irq_enable(); + local_irq_disable(); + t1 = ktime_get(); + t1n = ktime_to_ns(t1); + sched_clock_idle_wakeup_event(t1n - t0n); + } + local_irq_enable(); current_thread_info()->status |= TS_POLLING; } else { /* loop is done by the caller */ diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index c2db7ef93565..40fed477f3e5 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -116,9 +116,16 @@ static void default_idle(void) smp_mb(); local_irq_disable(); if (!need_resched()) { - /* Enables interrupts one instruction before HLT. - x86 special cases this so there is no race. */ - safe_halt(); + ktime_t t0, t1; + u64 t0n, t1n; + + t0 = ktime_get(); + t0n = ktime_to_ns(t0); + safe_halt(); /* enables interrupts racelessly */ + local_irq_disable(); + t1 = ktime_get(); + t1n = ktime_to_ns(t1); + sched_clock_idle_wakeup_event(t1n - t0n); } else local_irq_enable(); current_thread_info()->status |= TS_POLLING; -- cgit v1.2.3 From 39d44a51474a52bec6d72d30ebc76f5159101d90 Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Wed, 30 Jan 2008 13:30:06 +0100 Subject: x86: enable irq in default_idle on 64-bit local_irq_enable() is missing after sched_clock_idle_wakeup_event(). Signed-off-by: Hiroshi Shimamoto Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/kernel/process_64.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 40fed477f3e5..2c9e59448f4c 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -126,8 +126,8 @@ static void default_idle(void) t1 = ktime_get(); t1n = ktime_to_ns(t1); sched_clock_idle_wakeup_event(t1n - t0n); - } else - local_irq_enable(); + } + local_irq_enable(); current_thread_info()->status |= TS_POLLING; } -- cgit v1.2.3 From c140df973c07ac328aafd19d4f4c413f2f8902df Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 30 Jan 2008 13:30:09 +0100 Subject: x86: clean up arch/x86/kernel/aperture_64.c whitespace cleanup. No code changed: text data bss dec hex filename 2080 76 4 2160 870 aperture_64.o.before 2080 76 4 2160 870 aperture_64.o.after errors lines of code errors/KLOC arch/x86/kernel/aperture_64.c 114 299 381.2 arch/x86/kernel/aperture_64.c 0 315 0 Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/aperture_64.c | 244 ++++++++++++++++++++++-------------------- 1 file changed, 130 insertions(+), 114 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index 5b6992799c9d..250db0527f5d 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c @@ -1,12 +1,12 @@ -/* +/* * Firmware replacement code. - * + * * Work around broken BIOSes that don't set an aperture or only set the - * aperture in the AGP bridge. - * If all fails map the aperture over some low memory. This is cheaper than - * doing bounce buffering. The memory is lost. This is done at early boot - * because only the bootmem allocator can allocate 32+MB. - * + * aperture in the AGP bridge. + * If all fails map the aperture over some low memory. This is cheaper than + * doing bounce buffering. The memory is lost. This is done at early boot + * because only the bootmem allocator can allocate 32+MB. + * * Copyright 2002 Andi Kleen, SuSE Labs. */ #include @@ -30,7 +30,7 @@ int gart_iommu_aperture_disabled __initdata = 0; int gart_iommu_aperture_allowed __initdata = 0; int fallback_aper_order __initdata = 1; /* 64MB */ -int fallback_aper_force __initdata = 0; +int fallback_aper_force __initdata = 0; int fix_aperture __initdata = 1; @@ -49,20 +49,20 @@ static void __init insert_aperture_resource(u32 aper_base, u32 aper_size) /* This code runs before the PCI subsystem is initialized, so just access the northbridge directly. */ -static u32 __init allocate_aperture(void) +static u32 __init allocate_aperture(void) { u32 aper_size; - void *p; + void *p; - if (fallback_aper_order > 7) - fallback_aper_order = 7; - aper_size = (32 * 1024 * 1024) << fallback_aper_order; + if (fallback_aper_order > 7) + fallback_aper_order = 7; + aper_size = (32 * 1024 * 1024) << fallback_aper_order; - /* - * Aperture has to be naturally aligned. This means an 2GB aperture won't - * have much chance of finding a place in the lower 4GB of memory. - * Unfortunately we cannot move it up because that would make the - * IOMMU useless. + /* + * Aperture has to be naturally aligned. This means a 2GB aperture + * won't have much chance of finding a place in the lower 4GB of + * memory. Unfortunately we cannot move it up because that would + * make the IOMMU useless. */ p = __alloc_bootmem_nopanic(aper_size, aper_size, 0); if (!p || __pa(p)+aper_size > 0xffffffff) { @@ -73,54 +73,60 @@ static u32 __init allocate_aperture(void) return 0; } printk("Mapping aperture over %d KB of RAM @ %lx\n", - aper_size >> 10, __pa(p)); + aper_size >> 10, __pa(p)); insert_aperture_resource((u32)__pa(p), aper_size); - return (u32)__pa(p); + + return (u32)__pa(p); } static int __init aperture_valid(u64 aper_base, u32 aper_size) -{ - if (!aper_base) +{ + if (!aper_base) return 0; - if (aper_size < 64*1024*1024) { + if (aper_size < 64*1024*1024) { printk("Aperture too small (%d MB)\n", aper_size>>20); return 0; } if (aper_base + aper_size > 0x100000000UL) { printk("Aperture beyond 4GB. Ignoring.\n"); - return 0; + return 0; } if (e820_any_mapped(aper_base, aper_base + aper_size, E820_RAM)) { printk("Aperture pointing to e820 RAM. Ignoring.\n"); - return 0; - } + return 0; + } return 1; -} +} /* Find a PCI capability */ -static __u32 __init find_cap(int num, int slot, int func, int cap) -{ - u8 pos; +static __u32 __init find_cap(int num, int slot, int func, int cap) +{ int bytes; - if (!(read_pci_config_16(num,slot,func,PCI_STATUS) & PCI_STATUS_CAP_LIST)) + u8 pos; + + if (!(read_pci_config_16(num, slot, func, PCI_STATUS) & + PCI_STATUS_CAP_LIST)) return 0; - pos = read_pci_config_byte(num,slot,func,PCI_CAPABILITY_LIST); - for (bytes = 0; bytes < 48 && pos >= 0x40; bytes++) { + + pos = read_pci_config_byte(num, slot, func, PCI_CAPABILITY_LIST); + for (bytes = 0; bytes < 48 && pos >= 0x40; bytes++) { u8 id; - pos &= ~3; - id = read_pci_config_byte(num,slot,func,pos+PCI_CAP_LIST_ID); + + pos &= ~3; + id = read_pci_config_byte(num, slot, func, pos+PCI_CAP_LIST_ID); if (id == 0xff) break; - if (id == cap) - return pos; - pos = read_pci_config_byte(num,slot,func,pos+PCI_CAP_LIST_NEXT); - } + if (id == cap) + return pos; + pos = read_pci_config_byte(num, slot, func, + pos+PCI_CAP_LIST_NEXT); + } return 0; -} +} /* Read a standard AGPv3 bridge header */ static __u32 __init read_agp(int num, int slot, int func, int cap, u32 *order) -{ +{ u32 apsize; u32 apsizereg; int nbits; @@ -128,7 +134,7 @@ static __u32 __init read_agp(int num, int slot, int func, int cap, u32 *order) u64 aper; printk("AGP bridge at %02x:%02x:%02x\n", num, slot, func); - apsizereg = read_pci_config_16(num,slot,func, cap + 0x14); + apsizereg = read_pci_config_16(num, slot, func, cap + 0x14); if (apsizereg == 0xffffffff) { printk("APSIZE in AGP bridge unreadable\n"); return 0; @@ -136,80 +142,84 @@ static __u32 __init read_agp(int num, int slot, int func, int cap, u32 *order) apsize = apsizereg & 0xfff; /* Some BIOS use weird encodings not in the AGPv3 table. */ - if (apsize & 0xff) - apsize |= 0xf00; + if (apsize & 0xff) + apsize |= 0xf00; nbits = hweight16(apsize); *order = 7 - nbits; if ((int)*order < 0) /* < 32MB */ *order = 0; - - aper_low = read_pci_config(num,slot,func, 0x10); - aper_hi = read_pci_config(num,slot,func,0x14); + + aper_low = read_pci_config(num, slot, func, 0x10); + aper_hi = read_pci_config(num, slot, func, 0x14); aper = (aper_low & ~((1<<22)-1)) | ((u64)aper_hi << 32); - printk("Aperture from AGP @ %Lx size %u MB (APSIZE %x)\n", + printk("Aperture from AGP @ %Lx size %u MB (APSIZE %x)\n", aper, 32 << *order, apsizereg); if (!aperture_valid(aper, (32*1024*1024) << *order)) - return 0; - return (u32)aper; -} - -/* Look for an AGP bridge. Windows only expects the aperture in the - AGP bridge and some BIOS forget to initialize the Northbridge too. - Work around this here. - - Do an PCI bus scan by hand because we're running before the PCI - subsystem. + return 0; + return (u32)aper; +} - All K8 AGP bridges are AGPv3 compliant, so we can do this scan - generically. It's probably overkill to always scan all slots because - the AGP bridges should be always an own bus on the HT hierarchy, - but do it here for future safety. */ +/* + * Look for an AGP bridge. Windows only expects the aperture in the + * AGP bridge and some BIOS forget to initialize the Northbridge too. + * Work around this here. + * + * Do an PCI bus scan by hand because we're running before the PCI + * subsystem. + * + * All K8 AGP bridges are AGPv3 compliant, so we can do this scan + * generically. It's probably overkill to always scan all slots because + * the AGP bridges should be always an own bus on the HT hierarchy, + * but do it here for future safety. + */ static __u32 __init search_agp_bridge(u32 *order, int *valid_agp) { int num, slot, func; /* Poor man's PCI discovery */ - for (num = 0; num < 256; num++) { - for (slot = 0; slot < 32; slot++) { - for (func = 0; func < 8; func++) { + for (num = 0; num < 256; num++) { + for (slot = 0; slot < 32; slot++) { + for (func = 0; func < 8; func++) { u32 class, cap; u8 type; - class = read_pci_config(num,slot,func, + class = read_pci_config(num, slot, func, PCI_CLASS_REVISION); if (class == 0xffffffff) - break; - - switch (class >> 16) { + break; + + switch (class >> 16) { case PCI_CLASS_BRIDGE_HOST: case PCI_CLASS_BRIDGE_OTHER: /* needed? */ /* AGP bridge? */ - cap = find_cap(num,slot,func,PCI_CAP_ID_AGP); + cap = find_cap(num, slot, func, + PCI_CAP_ID_AGP); if (!cap) break; - *valid_agp = 1; - return read_agp(num,slot,func,cap,order); - } - + *valid_agp = 1; + return read_agp(num, slot, func, cap, + order); + } + /* No multi-function device? */ - type = read_pci_config_byte(num,slot,func, + type = read_pci_config_byte(num, slot, func, PCI_HEADER_TYPE); if (!(type & 0x80)) break; - } - } + } + } } - printk("No AGP bridge found\n"); + printk("No AGP bridge found\n"); + return 0; } void __init gart_iommu_hole_init(void) -{ - int fix, num; +{ u32 aper_size, aper_alloc = 0, aper_order = 0, last_aper_order = 0; u64 aper_base, last_aper_base = 0; - int valid_agp = 0; + int fix, num, valid_agp = 0; if (gart_iommu_aperture_disabled || !fix_aperture || !early_pci_allowed()) @@ -218,24 +228,24 @@ void __init gart_iommu_hole_init(void) printk(KERN_INFO "Checking aperture...\n"); fix = 0; - for (num = 24; num < 32; num++) { + for (num = 24; num < 32; num++) { if (!early_is_k8_nb(read_pci_config(0, num, 3, 0x00))) continue; iommu_detected = 1; gart_iommu_aperture = 1; - aper_order = (read_pci_config(0, num, 3, 0x90) >> 1) & 7; - aper_size = (32 * 1024 * 1024) << aper_order; + aper_order = (read_pci_config(0, num, 3, 0x90) >> 1) & 7; + aper_size = (32 * 1024 * 1024) << aper_order; aper_base = read_pci_config(0, num, 3, 0x94) & 0x7fff; - aper_base <<= 25; + aper_base <<= 25; - printk("CPU %d: aperture @ %Lx size %u MB\n", num-24, + printk("CPU %d: aperture @ %Lx size %u MB\n", num-24, aper_base, aper_size>>20); - + if (!aperture_valid(aper_base, aper_size)) { - fix = 1; - break; + fix = 1; + break; } if ((last_aper_order && aper_order != last_aper_order) || @@ -245,27 +255,28 @@ void __init gart_iommu_hole_init(void) } last_aper_order = aper_order; last_aper_base = aper_base; - } + } if (!fix && !fallback_aper_force) { if (last_aper_base) { unsigned long n = (32 * 1024 * 1024) << last_aper_order; + insert_aperture_resource((u32)last_aper_base, n); } - return; + return; } if (!fallback_aper_force) - aper_alloc = search_agp_bridge(&aper_order, &valid_agp); - - if (aper_alloc) { + aper_alloc = search_agp_bridge(&aper_order, &valid_agp); + + if (aper_alloc) { /* Got the aperture from the AGP bridge */ } else if (swiotlb && !valid_agp) { /* Do nothing */ } else if ((!no_iommu && end_pfn > MAX_DMA32_PFN) || force_iommu || valid_agp || - fallback_aper_force) { + fallback_aper_force) { printk("Your BIOS doesn't leave a aperture memory hole\n"); printk("Please enable the IOMMU option in the BIOS setup\n"); printk("This costs you %d MB of RAM\n", @@ -273,27 +284,32 @@ void __init gart_iommu_hole_init(void) aper_order = fallback_aper_order; aper_alloc = allocate_aperture(); - if (!aper_alloc) { - /* Could disable AGP and IOMMU here, but it's probably - not worth it. But the later users cannot deal with - bad apertures and turning on the aperture over memory - causes very strange problems, so it's better to - panic early. */ + if (!aper_alloc) { + /* + * Could disable AGP and IOMMU here, but it's + * probably not worth it. But the later users + * cannot deal with bad apertures and turning + * on the aperture over memory causes very + * strange problems, so it's better to panic + * early. + */ panic("Not enough memory for aperture"); } - } else { - return; - } + } else { + return; + } /* Fix up the north bridges */ - for (num = 24; num < 32; num++) { + for (num = 24; num < 32; num++) { if (!early_is_k8_nb(read_pci_config(0, num, 3, 0x00))) - continue; - - /* Don't enable translation yet. That is done later. - Assume this BIOS didn't initialise the GART so - just overwrite all previous bits */ - write_pci_config(0, num, 3, 0x90, aper_order<<1); - write_pci_config(0, num, 3, 0x94, aper_alloc>>25); - } -} + continue; + + /* + * Don't enable translation yet. That is done later. + * Assume this BIOS didn't initialise the GART so + * just overwrite all previous bits + */ + write_pci_config(0, num, 3, 0x90, aper_order<<1); + write_pci_config(0, num, 3, 0x94, aper_alloc>>25); + } +} -- cgit v1.2.3 From 31183ba8fd05b6ddc67ab4d726167cbc52e1b346 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 30 Jan 2008 13:30:10 +0100 Subject: x86: clean up arch/x86/kernel/aperture_64.c printk()s clean up arch/x86/kernel/aperture_64.c printk()s. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/aperture_64.c | 42 ++++++++++++++++++++++++------------------ 1 file changed, 24 insertions(+), 18 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index 250db0527f5d..52d2beac4556 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c @@ -66,14 +66,15 @@ static u32 __init allocate_aperture(void) */ p = __alloc_bootmem_nopanic(aper_size, aper_size, 0); if (!p || __pa(p)+aper_size > 0xffffffff) { - printk("Cannot allocate aperture memory hole (%p,%uK)\n", - p, aper_size>>10); + printk(KERN_ERR + "Cannot allocate aperture memory hole (%p,%uK)\n", + p, aper_size>>10); if (p) free_bootmem(__pa(p), aper_size); return 0; } - printk("Mapping aperture over %d KB of RAM @ %lx\n", - aper_size >> 10, __pa(p)); + printk(KERN_INFO "Mapping aperture over %d KB of RAM @ %lx\n", + aper_size >> 10, __pa(p)); insert_aperture_resource((u32)__pa(p), aper_size); return (u32)__pa(p); @@ -83,18 +84,20 @@ static int __init aperture_valid(u64 aper_base, u32 aper_size) { if (!aper_base) return 0; + if (aper_size < 64*1024*1024) { - printk("Aperture too small (%d MB)\n", aper_size>>20); + printk(KERN_ERR "Aperture too small (%d MB)\n", aper_size>>20); return 0; } if (aper_base + aper_size > 0x100000000UL) { - printk("Aperture beyond 4GB. Ignoring.\n"); + printk(KERN_ERR "Aperture beyond 4GB. Ignoring.\n"); return 0; } if (e820_any_mapped(aper_base, aper_base + aper_size, E820_RAM)) { - printk("Aperture pointing to e820 RAM. Ignoring.\n"); + printk(KERN_ERR "Aperture pointing to e820 RAM. Ignoring.\n"); return 0; } + return 1; } @@ -133,10 +136,10 @@ static __u32 __init read_agp(int num, int slot, int func, int cap, u32 *order) u32 aper_low, aper_hi; u64 aper; - printk("AGP bridge at %02x:%02x:%02x\n", num, slot, func); + printk(KERN_INFO "AGP bridge at %02x:%02x:%02x\n", num, slot, func); apsizereg = read_pci_config_16(num, slot, func, cap + 0x14); if (apsizereg == 0xffffffff) { - printk("APSIZE in AGP bridge unreadable\n"); + printk(KERN_ERR "APSIZE in AGP bridge unreadable\n"); return 0; } @@ -153,8 +156,8 @@ static __u32 __init read_agp(int num, int slot, int func, int cap, u32 *order) aper_hi = read_pci_config(num, slot, func, 0x14); aper = (aper_low & ~((1<<22)-1)) | ((u64)aper_hi << 32); - printk("Aperture from AGP @ %Lx size %u MB (APSIZE %x)\n", - aper, 32 << *order, apsizereg); + printk(KERN_INFO "Aperture from AGP @ %Lx size %u MB (APSIZE %x)\n", + aper, 32 << *order, apsizereg); if (!aperture_valid(aper, (32*1024*1024) << *order)) return 0; @@ -210,7 +213,7 @@ static __u32 __init search_agp_bridge(u32 *order, int *valid_agp) } } } - printk("No AGP bridge found\n"); + printk(KERN_INFO "No AGP bridge found\n"); return 0; } @@ -240,8 +243,8 @@ void __init gart_iommu_hole_init(void) aper_base = read_pci_config(0, num, 3, 0x94) & 0x7fff; aper_base <<= 25; - printk("CPU %d: aperture @ %Lx size %u MB\n", num-24, - aper_base, aper_size>>20); + printk(KERN_INFO "CPU %d: aperture @ %Lx size %u MB\n", + num-24, aper_base, aper_size>>20); if (!aperture_valid(aper_base, aper_size)) { fix = 1; @@ -277,10 +280,13 @@ void __init gart_iommu_hole_init(void) force_iommu || valid_agp || fallback_aper_force) { - printk("Your BIOS doesn't leave a aperture memory hole\n"); - printk("Please enable the IOMMU option in the BIOS setup\n"); - printk("This costs you %d MB of RAM\n", - 32 << fallback_aper_order); + printk(KERN_ERR + "Your BIOS doesn't leave a aperture memory hole\n"); + printk(KERN_ERR + "Please enable the IOMMU option in the BIOS setup\n"); + printk(KERN_ERR + "This costs you %d MB of RAM\n", + 32 << fallback_aper_order); aper_order = fallback_aper_order; aper_alloc = allocate_aperture(); -- cgit v1.2.3 From 05fccb0e3840248324a96b320562210410be73dc Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 30 Jan 2008 13:30:12 +0100 Subject: x86: code cleanups in arch/x86/kernel/pci-gart_64.c code cleanups: errors lines of code errors/KLOC arch/x86/kernel/pci-gart_64.c 183 748 244.6 arch/x86/kernel/pci-gart_64.c 0 790 0 Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/pci-gart_64.c | 508 +++++++++++++++++++++++------------------- 1 file changed, 276 insertions(+), 232 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 06bcba536045..d2b46b489412 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -1,12 +1,12 @@ /* * Dynamic DMA mapping support for AMD Hammer. - * + * * Use the integrated AGP GART in the Hammer northbridge as an IOMMU for PCI. * This allows to use PCI devices that only support 32bit addresses on systems - * with more than 4GB. + * with more than 4GB. * * See Documentation/DMA-mapping.txt for the interface specification. - * + * * Copyright 2002 Andi Kleen, SuSE Labs. * Subject to the GNU General Public License v2 only. */ @@ -37,23 +37,26 @@ #include static unsigned long iommu_bus_base; /* GART remapping area (physical) */ -static unsigned long iommu_size; /* size of remapping area bytes */ +static unsigned long iommu_size; /* size of remapping area bytes */ static unsigned long iommu_pages; /* .. and in pages */ -static u32 *iommu_gatt_base; /* Remapping table */ +static u32 *iommu_gatt_base; /* Remapping table */ -/* If this is disabled the IOMMU will use an optimized flushing strategy - of only flushing when an mapping is reused. With it true the GART is flushed - for every mapping. Problem is that doing the lazy flush seems to trigger - bugs with some popular PCI cards, in particular 3ware (but has been also - also seen with Qlogic at least). */ +/* + * If this is disabled the IOMMU will use an optimized flushing strategy + * of only flushing when an mapping is reused. With it true the GART is + * flushed for every mapping. Problem is that doing the lazy flush seems + * to trigger bugs with some popular PCI cards, in particular 3ware (but + * has been also also seen with Qlogic at least). + */ int iommu_fullflush = 1; -/* Allocation bitmap for the remapping area */ +/* Allocation bitmap for the remapping area: */ static DEFINE_SPINLOCK(iommu_bitmap_lock); -static unsigned long *iommu_gart_bitmap; /* guarded by iommu_bitmap_lock */ +/* Guarded by iommu_bitmap_lock: */ +static unsigned long *iommu_gart_bitmap; -static u32 gart_unmapped_entry; +static u32 gart_unmapped_entry; #define GPTE_VALID 1 #define GPTE_COHERENT 2 @@ -61,10 +64,10 @@ static u32 gart_unmapped_entry; (((x) & 0xfffff000) | (((x) >> 32) << 4) | GPTE_VALID | GPTE_COHERENT) #define GPTE_DECODE(x) (((x) & 0xfffff000) | (((u64)(x) & 0xff0) << 28)) -#define to_pages(addr,size) \ +#define to_pages(addr, size) \ (round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT) -#define EMERGENCY_PAGES 32 /* = 128KB */ +#define EMERGENCY_PAGES 32 /* = 128KB */ #ifdef CONFIG_AGP #define AGPEXTERN extern @@ -77,130 +80,152 @@ AGPEXTERN int agp_memory_reserved; AGPEXTERN __u32 *agp_gatt_table; static unsigned long next_bit; /* protected by iommu_bitmap_lock */ -static int need_flush; /* global flush state. set for each gart wrap */ +static int need_flush; /* global flush state. set for each gart wrap */ -static unsigned long alloc_iommu(int size) -{ +static unsigned long alloc_iommu(int size) +{ unsigned long offset, flags; - spin_lock_irqsave(&iommu_bitmap_lock, flags); - offset = find_next_zero_string(iommu_gart_bitmap,next_bit,iommu_pages,size); + spin_lock_irqsave(&iommu_bitmap_lock, flags); + offset = find_next_zero_string(iommu_gart_bitmap, next_bit, + iommu_pages, size); if (offset == -1) { need_flush = 1; - offset = find_next_zero_string(iommu_gart_bitmap,0,iommu_pages,size); + offset = find_next_zero_string(iommu_gart_bitmap, 0, + iommu_pages, size); } - if (offset != -1) { - set_bit_string(iommu_gart_bitmap, offset, size); - next_bit = offset+size; - if (next_bit >= iommu_pages) { + if (offset != -1) { + set_bit_string(iommu_gart_bitmap, offset, size); + next_bit = offset+size; + if (next_bit >= iommu_pages) { next_bit = 0; need_flush = 1; - } - } + } + } if (iommu_fullflush) need_flush = 1; - spin_unlock_irqrestore(&iommu_bitmap_lock, flags); + spin_unlock_irqrestore(&iommu_bitmap_lock, flags); + return offset; -} +} static void free_iommu(unsigned long offset, int size) -{ +{ unsigned long flags; + spin_lock_irqsave(&iommu_bitmap_lock, flags); __clear_bit_string(iommu_gart_bitmap, offset, size); spin_unlock_irqrestore(&iommu_bitmap_lock, flags); -} +} -/* +/* * Use global flush state to avoid races with multiple flushers. */ static void flush_gart(void) -{ +{ unsigned long flags; + spin_lock_irqsave(&iommu_bitmap_lock, flags); if (need_flush) { k8_flush_garts(); need_flush = 0; - } + } spin_unlock_irqrestore(&iommu_bitmap_lock, flags); -} +} #ifdef CONFIG_IOMMU_LEAK -#define SET_LEAK(x) if (iommu_leak_tab) \ - iommu_leak_tab[x] = __builtin_return_address(0); -#define CLEAR_LEAK(x) if (iommu_leak_tab) \ - iommu_leak_tab[x] = NULL; +#define SET_LEAK(x) \ + do { \ + if (iommu_leak_tab) \ + iommu_leak_tab[x] = __builtin_return_address(0);\ + } while (0) + +#define CLEAR_LEAK(x) \ + do { \ + if (iommu_leak_tab) \ + iommu_leak_tab[x] = NULL; \ + } while (0) /* Debugging aid for drivers that don't free their IOMMU tables */ -static void **iommu_leak_tab; +static void **iommu_leak_tab; static int leak_trace; static int iommu_leak_pages = 20; + static void dump_leak(void) { int i; - static int dump; - if (dump || !iommu_leak_tab) return; + static int dump; + + if (dump || !iommu_leak_tab) + return; dump = 1; - show_stack(NULL,NULL); - /* Very crude. dump some from the end of the table too */ - printk("Dumping %d pages from end of IOMMU:\n", iommu_leak_pages); - for (i = 0; i < iommu_leak_pages; i+=2) { - printk("%lu: ", iommu_pages-i); + show_stack(NULL, NULL); + + /* Very crude. dump some from the end of the table too */ + printk(KERN_DEBUG "Dumping %d pages from end of IOMMU:\n", + iommu_leak_pages); + for (i = 0; i < iommu_leak_pages; i += 2) { + printk(KERN_DEBUG "%lu: ", iommu_pages-i); printk_address((unsigned long) iommu_leak_tab[iommu_pages-i]); - printk("%c", (i+1)%2 == 0 ? '\n' : ' '); - } - printk("\n"); + printk(KERN_CONT "%c", (i+1)%2 == 0 ? '\n' : ' '); + } + printk(KERN_DEBUG "\n"); } #else -#define SET_LEAK(x) -#define CLEAR_LEAK(x) +# define SET_LEAK(x) +# define CLEAR_LEAK(x) #endif static void iommu_full(struct device *dev, size_t size, int dir) { - /* + /* * Ran out of IOMMU space for this operation. This is very bad. * Unfortunately the drivers cannot handle this operation properly. - * Return some non mapped prereserved space in the aperture and + * Return some non mapped prereserved space in the aperture and * let the Northbridge deal with it. This will result in garbage * in the IO operation. When the size exceeds the prereserved space - * memory corruption will occur or random memory will be DMAed + * memory corruption will occur or random memory will be DMAed * out. Hopefully no network devices use single mappings that big. - */ - - printk(KERN_ERR - "PCI-DMA: Out of IOMMU space for %lu bytes at device %s\n", - size, dev->bus_id); + */ + + printk(KERN_ERR + "PCI-DMA: Out of IOMMU space for %lu bytes at device %s\n", + size, dev->bus_id); if (size > PAGE_SIZE*EMERGENCY_PAGES) { if (dir == PCI_DMA_FROMDEVICE || dir == PCI_DMA_BIDIRECTIONAL) panic("PCI-DMA: Memory would be corrupted\n"); - if (dir == PCI_DMA_TODEVICE || dir == PCI_DMA_BIDIRECTIONAL) - panic(KERN_ERR "PCI-DMA: Random memory would be DMAed\n"); - } - + if (dir == PCI_DMA_TODEVICE || dir == PCI_DMA_BIDIRECTIONAL) + panic(KERN_ERR + "PCI-DMA: Random memory would be DMAed\n"); + } #ifdef CONFIG_IOMMU_LEAK - dump_leak(); + dump_leak(); #endif -} +} -static inline int need_iommu(struct device *dev, unsigned long addr, size_t size) -{ +static inline int +need_iommu(struct device *dev, unsigned long addr, size_t size) +{ u64 mask = *dev->dma_mask; int high = addr + size > mask; int mmu = high; - if (force_iommu) - mmu = 1; - return mmu; + + if (force_iommu) + mmu = 1; + + return mmu; } -static inline int nonforced_iommu(struct device *dev, unsigned long addr, size_t size) -{ +static inline int +nonforced_iommu(struct device *dev, unsigned long addr, size_t size) +{ u64 mask = *dev->dma_mask; int high = addr + size > mask; int mmu = high; - return mmu; + + return mmu; } /* Map a single continuous physical area into the IOMMU. @@ -208,13 +233,14 @@ static inline int nonforced_iommu(struct device *dev, unsigned long addr, size_t */ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, size_t size, int dir) -{ +{ unsigned long npages = to_pages(phys_mem, size); unsigned long iommu_page = alloc_iommu(npages); int i; + if (iommu_page == -1) { if (!nonforced_iommu(dev, phys_mem, size)) - return phys_mem; + return phys_mem; if (panic_on_overflow) panic("dma_map_area overflow %lu bytes\n", size); iommu_full(dev, size, dir); @@ -229,35 +255,39 @@ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, return iommu_bus_base + iommu_page*PAGE_SIZE + (phys_mem & ~PAGE_MASK); } -static dma_addr_t gart_map_simple(struct device *dev, char *buf, - size_t size, int dir) +static dma_addr_t +gart_map_simple(struct device *dev, char *buf, size_t size, int dir) { dma_addr_t map = dma_map_area(dev, virt_to_bus(buf), size, dir); + flush_gart(); + return map; } /* Map a single area into the IOMMU */ -static dma_addr_t gart_map_single(struct device *dev, void *addr, size_t size, int dir) +static dma_addr_t +gart_map_single(struct device *dev, void *addr, size_t size, int dir) { unsigned long phys_mem, bus; if (!dev) dev = &fallback_dev; - phys_mem = virt_to_phys(addr); + phys_mem = virt_to_phys(addr); if (!need_iommu(dev, phys_mem, size)) - return phys_mem; + return phys_mem; bus = gart_map_simple(dev, addr, size, dir); - return bus; + + return bus; } /* * Free a DMA mapping. */ static void gart_unmap_single(struct device *dev, dma_addr_t dma_addr, - size_t size, int direction) + size_t size, int direction) { unsigned long iommu_page; int npages; @@ -266,6 +296,7 @@ static void gart_unmap_single(struct device *dev, dma_addr_t dma_addr, if (dma_addr < iommu_bus_base + EMERGENCY_PAGES*PAGE_SIZE || dma_addr >= iommu_bus_base + iommu_size) return; + iommu_page = (dma_addr - iommu_bus_base)>>PAGE_SHIFT; npages = to_pages(dma_addr, size); for (i = 0; i < npages; i++) { @@ -278,7 +309,8 @@ static void gart_unmap_single(struct device *dev, dma_addr_t dma_addr, /* * Wrapper for pci_unmap_single working with scatterlists. */ -static void gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) +static void +gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) { struct scatterlist *s; int i; @@ -303,12 +335,13 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg, for_each_sg(sg, s, nents, i) { unsigned long addr = sg_phys(s); - if (nonforced_iommu(dev, addr, s->length)) { + + if (nonforced_iommu(dev, addr, s->length)) { addr = dma_map_area(dev, addr, s->length, dir); - if (addr == bad_dma_address) { - if (i > 0) + if (addr == bad_dma_address) { + if (i > 0) gart_unmap_sg(dev, sg, i, dir); - nents = 0; + nents = 0; sg[0].dma_length = 0; break; } @@ -317,15 +350,16 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg, s->dma_length = s->length; } flush_gart(); + return nents; } /* Map multiple scatterlist entries continuous into the first. */ static int __dma_map_cont(struct scatterlist *start, int nelems, - struct scatterlist *sout, unsigned long pages) + struct scatterlist *sout, unsigned long pages) { unsigned long iommu_start = alloc_iommu(pages); - unsigned long iommu_page = iommu_start; + unsigned long iommu_page = iommu_start; struct scatterlist *s; int i; @@ -335,32 +369,33 @@ static int __dma_map_cont(struct scatterlist *start, int nelems, for_each_sg(start, s, nelems, i) { unsigned long pages, addr; unsigned long phys_addr = s->dma_address; - + BUG_ON(s != start && s->offset); if (s == start) { sout->dma_address = iommu_bus_base; sout->dma_address += iommu_page*PAGE_SIZE + s->offset; sout->dma_length = s->length; - } else { - sout->dma_length += s->length; + } else { + sout->dma_length += s->length; } addr = phys_addr; - pages = to_pages(s->offset, s->length); - while (pages--) { - iommu_gatt_base[iommu_page] = GPTE_ENCODE(addr); + pages = to_pages(s->offset, s->length); + while (pages--) { + iommu_gatt_base[iommu_page] = GPTE_ENCODE(addr); SET_LEAK(iommu_page); addr += PAGE_SIZE; iommu_page++; } - } - BUG_ON(iommu_page - iommu_start != pages); + } + BUG_ON(iommu_page - iommu_start != pages); + return 0; } -static inline int dma_map_cont(struct scatterlist *start, int nelems, - struct scatterlist *sout, - unsigned long pages, int need) +static inline int +dma_map_cont(struct scatterlist *start, int nelems, struct scatterlist *sout, + unsigned long pages, int need) { if (!need) { BUG_ON(nelems != 1); @@ -370,22 +405,19 @@ static inline int dma_map_cont(struct scatterlist *start, int nelems, } return __dma_map_cont(start, nelems, sout, pages); } - + /* * DMA map all entries in a scatterlist. - * Merge chunks that have page aligned sizes into a continuous mapping. + * Merge chunks that have page aligned sizes into a continuous mapping. */ -static int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, - int dir) +static int +gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) { - int i; - int out; - int start; - unsigned long pages = 0; - int need = 0, nextneed; struct scatterlist *s, *ps, *start_sg, *sgmap; + int need = 0, nextneed, i, out, start; + unsigned long pages = 0; - if (nents == 0) + if (nents == 0) return 0; if (!dev) @@ -397,15 +429,19 @@ static int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, ps = NULL; /* shut up gcc */ for_each_sg(sg, s, nents, i) { dma_addr_t addr = sg_phys(s); + s->dma_address = addr; - BUG_ON(s->length == 0); + BUG_ON(s->length == 0); - nextneed = need_iommu(dev, addr, s->length); + nextneed = need_iommu(dev, addr, s->length); /* Handle the previous not yet processed entries */ if (i > start) { - /* Can only merge when the last chunk ends on a page - boundary and the new one doesn't have an offset. */ + /* + * Can only merge when the last chunk ends on a + * page boundary and the new one doesn't have an + * offset. + */ if (!iommu_merge || !nextneed || !need || s->offset || (ps->offset + ps->length) % PAGE_SIZE) { if (dma_map_cont(start_sg, i - start, sgmap, @@ -436,6 +472,7 @@ static int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, error: flush_gart(); gart_unmap_sg(dev, sg, out, dir); + /* When it was forced or merged try again in a dumb way */ if (force_iommu || iommu_merge) { out = dma_map_sg_nonforce(dev, sg, nents, dir); @@ -444,64 +481,68 @@ error: } if (panic_on_overflow) panic("dma_map_sg: overflow on %lu pages\n", pages); + iommu_full(dev, pages << PAGE_SHIFT, dir); for_each_sg(sg, s, nents, i) s->dma_address = bad_dma_address; return 0; -} +} static int no_agp; static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size) -{ - unsigned long a; - if (!iommu_size) { - iommu_size = aper_size; - if (!no_agp) - iommu_size /= 2; - } - - a = aper + iommu_size; +{ + unsigned long a; + + if (!iommu_size) { + iommu_size = aper_size; + if (!no_agp) + iommu_size /= 2; + } + + a = aper + iommu_size; iommu_size -= round_up(a, LARGE_PAGE_SIZE) - a; - if (iommu_size < 64*1024*1024) + if (iommu_size < 64*1024*1024) { printk(KERN_WARNING - "PCI-DMA: Warning: Small IOMMU %luMB. Consider increasing the AGP aperture in BIOS\n",iommu_size>>20); - + "PCI-DMA: Warning: Small IOMMU %luMB." + " Consider increasing the AGP aperture in BIOS\n", + iommu_size >> 20); + } + return iommu_size; -} +} -static __init unsigned read_aperture(struct pci_dev *dev, u32 *size) -{ - unsigned aper_size = 0, aper_base_32; +static __init unsigned read_aperture(struct pci_dev *dev, u32 *size) +{ + unsigned aper_size = 0, aper_base_32, aper_order; u64 aper_base; - unsigned aper_order; - pci_read_config_dword(dev, 0x94, &aper_base_32); + pci_read_config_dword(dev, 0x94, &aper_base_32); pci_read_config_dword(dev, 0x90, &aper_order); - aper_order = (aper_order >> 1) & 7; + aper_order = (aper_order >> 1) & 7; - aper_base = aper_base_32 & 0x7fff; + aper_base = aper_base_32 & 0x7fff; aper_base <<= 25; - aper_size = (32 * 1024 * 1024) << aper_order; - if (aper_base + aper_size > 0x100000000UL || !aper_size) + aper_size = (32 * 1024 * 1024) << aper_order; + if (aper_base + aper_size > 0x100000000UL || !aper_size) aper_base = 0; *size = aper_size; return aper_base; -} +} -/* +/* * Private Northbridge GATT initialization in case we cannot use the - * AGP driver for some reason. + * AGP driver for some reason. */ static __init int init_k8_gatt(struct agp_kern_info *info) -{ +{ + unsigned aper_size, gatt_size, new_aper_size; + unsigned aper_base, new_aper_base; struct pci_dev *dev; void *gatt; - unsigned aper_base, new_aper_base; - unsigned aper_size, gatt_size, new_aper_size; int i; printk(KERN_INFO "PCI-DMA: Disabling AGP.\n"); @@ -509,75 +550,77 @@ static __init int init_k8_gatt(struct agp_kern_info *info) dev = NULL; for (i = 0; i < num_k8_northbridges; i++) { dev = k8_northbridges[i]; - new_aper_base = read_aperture(dev, &new_aper_size); - if (!new_aper_base) - goto nommu; - - if (!aper_base) { + new_aper_base = read_aperture(dev, &new_aper_size); + if (!new_aper_base) + goto nommu; + + if (!aper_base) { aper_size = new_aper_size; aper_base = new_aper_base; - } - if (aper_size != new_aper_size || aper_base != new_aper_base) + } + if (aper_size != new_aper_size || aper_base != new_aper_base) goto nommu; } if (!aper_base) - goto nommu; + goto nommu; info->aper_base = aper_base; - info->aper_size = aper_size>>20; + info->aper_size = aper_size >> 20; - gatt_size = (aper_size >> PAGE_SHIFT) * sizeof(u32); - gatt = (void *)__get_free_pages(GFP_KERNEL, get_order(gatt_size)); - if (!gatt) + gatt_size = (aper_size >> PAGE_SHIFT) * sizeof(u32); + gatt = (void *)__get_free_pages(GFP_KERNEL, get_order(gatt_size)); + if (!gatt) panic("Cannot allocate GATT table"); - if (change_page_attr_addr((unsigned long)gatt, gatt_size >> PAGE_SHIFT, PAGE_KERNEL_NOCACHE)) + if (change_page_attr_addr((unsigned long)gatt, gatt_size >> PAGE_SHIFT, + PAGE_KERNEL_NOCACHE)) panic("Could not set GART PTEs to uncacheable pages"); global_flush_tlb(); - memset(gatt, 0, gatt_size); + memset(gatt, 0, gatt_size); agp_gatt_table = gatt; for (i = 0; i < num_k8_northbridges; i++) { - u32 ctl; - u32 gatt_reg; + u32 gatt_reg; + u32 ctl; dev = k8_northbridges[i]; - gatt_reg = __pa(gatt) >> 12; - gatt_reg <<= 4; + gatt_reg = __pa(gatt) >> 12; + gatt_reg <<= 4; pci_write_config_dword(dev, 0x98, gatt_reg); - pci_read_config_dword(dev, 0x90, &ctl); + pci_read_config_dword(dev, 0x90, &ctl); ctl |= 1; ctl &= ~((1<<4) | (1<<5)); - pci_write_config_dword(dev, 0x90, ctl); + pci_write_config_dword(dev, 0x90, ctl); } flush_gart(); - - printk("PCI-DMA: aperture base @ %x size %u KB\n",aper_base, aper_size>>10); + + printk(KERN_INFO "PCI-DMA: aperture base @ %x size %u KB\n", + aper_base, aper_size>>10); return 0; nommu: - /* Should not happen anymore */ + /* Should not happen anymore */ printk(KERN_ERR "PCI-DMA: More than 4GB of RAM and no IOMMU\n" KERN_ERR "PCI-DMA: 32bit PCI IO may malfunction.\n"); - return -1; -} + return -1; +} extern int agp_amd64_init(void); static const struct dma_mapping_ops gart_dma_ops = { - .mapping_error = NULL, - .map_single = gart_map_single, - .map_simple = gart_map_simple, - .unmap_single = gart_unmap_single, - .sync_single_for_cpu = NULL, - .sync_single_for_device = NULL, - .sync_single_range_for_cpu = NULL, - .sync_single_range_for_device = NULL, - .sync_sg_for_cpu = NULL, - .sync_sg_for_device = NULL, - .map_sg = gart_map_sg, - .unmap_sg = gart_unmap_sg, + .mapping_error = NULL, + .map_single = gart_map_single, + .map_simple = gart_map_simple, + .unmap_single = gart_unmap_single, + .sync_single_for_cpu = NULL, + .sync_single_for_device = NULL, + .sync_single_range_for_cpu = NULL, + .sync_single_range_for_device = NULL, + .sync_sg_for_cpu = NULL, + .sync_sg_for_device = NULL, + .map_sg = gart_map_sg, + .unmap_sg = gart_unmap_sg, }; void gart_iommu_shutdown(void) @@ -588,23 +631,23 @@ void gart_iommu_shutdown(void) if (no_agp && (dma_ops != &gart_dma_ops)) return; - for (i = 0; i < num_k8_northbridges; i++) { - u32 ctl; + for (i = 0; i < num_k8_northbridges; i++) { + u32 ctl; - dev = k8_northbridges[i]; - pci_read_config_dword(dev, 0x90, &ctl); + dev = k8_northbridges[i]; + pci_read_config_dword(dev, 0x90, &ctl); - ctl &= ~1; + ctl &= ~1; - pci_write_config_dword(dev, 0x90, ctl); - } + pci_write_config_dword(dev, 0x90, ctl); + } } void __init gart_iommu_init(void) -{ +{ struct agp_kern_info info; - unsigned long aper_size; unsigned long iommu_start; + unsigned long aper_size; unsigned long scratch; long i; @@ -614,14 +657,14 @@ void __init gart_iommu_init(void) } #ifndef CONFIG_AGP_AMD64 - no_agp = 1; + no_agp = 1; #else /* Makefile puts PCI initialization via subsys_initcall first. */ /* Add other K8 AGP bridge drivers here */ - no_agp = no_agp || - (agp_amd64_init() < 0) || + no_agp = no_agp || + (agp_amd64_init() < 0) || (agp_copy_info(agp_bridge, &info) < 0); -#endif +#endif if (swiotlb) return; @@ -643,77 +686,78 @@ void __init gart_iommu_init(void) } printk(KERN_INFO "PCI-DMA: using GART IOMMU.\n"); - aper_size = info.aper_size * 1024 * 1024; - iommu_size = check_iommu_size(info.aper_base, aper_size); - iommu_pages = iommu_size >> PAGE_SHIFT; - - iommu_gart_bitmap = (void*)__get_free_pages(GFP_KERNEL, - get_order(iommu_pages/8)); - if (!iommu_gart_bitmap) - panic("Cannot allocate iommu bitmap\n"); + aper_size = info.aper_size * 1024 * 1024; + iommu_size = check_iommu_size(info.aper_base, aper_size); + iommu_pages = iommu_size >> PAGE_SHIFT; + + iommu_gart_bitmap = (void *) __get_free_pages(GFP_KERNEL, + get_order(iommu_pages/8)); + if (!iommu_gart_bitmap) + panic("Cannot allocate iommu bitmap\n"); memset(iommu_gart_bitmap, 0, iommu_pages/8); #ifdef CONFIG_IOMMU_LEAK - if (leak_trace) { - iommu_leak_tab = (void *)__get_free_pages(GFP_KERNEL, + if (leak_trace) { + iommu_leak_tab = (void *)__get_free_pages(GFP_KERNEL, get_order(iommu_pages*sizeof(void *))); - if (iommu_leak_tab) - memset(iommu_leak_tab, 0, iommu_pages * 8); + if (iommu_leak_tab) + memset(iommu_leak_tab, 0, iommu_pages * 8); else - printk("PCI-DMA: Cannot allocate leak trace area\n"); - } + printk(KERN_DEBUG + "PCI-DMA: Cannot allocate leak trace area\n"); + } #endif - /* + /* * Out of IOMMU space handling. - * Reserve some invalid pages at the beginning of the GART. - */ - set_bit_string(iommu_gart_bitmap, 0, EMERGENCY_PAGES); + * Reserve some invalid pages at the beginning of the GART. + */ + set_bit_string(iommu_gart_bitmap, 0, EMERGENCY_PAGES); - agp_memory_reserved = iommu_size; + agp_memory_reserved = iommu_size; printk(KERN_INFO "PCI-DMA: Reserving %luMB of IOMMU area in the AGP aperture\n", - iommu_size>>20); + iommu_size >> 20); - iommu_start = aper_size - iommu_size; - iommu_bus_base = info.aper_base + iommu_start; + iommu_start = aper_size - iommu_size; + iommu_bus_base = info.aper_base + iommu_start; bad_dma_address = iommu_bus_base; iommu_gatt_base = agp_gatt_table + (iommu_start>>PAGE_SHIFT); - /* + /* * Unmap the IOMMU part of the GART. The alias of the page is * always mapped with cache enabled and there is no full cache * coherency across the GART remapping. The unmapping avoids * automatic prefetches from the CPU allocating cache lines in * there. All CPU accesses are done via the direct mapping to * the backing memory. The GART address is only used by PCI - * devices. + * devices. */ clear_kernel_mapping((unsigned long)__va(iommu_bus_base), iommu_size); - /* - * Try to workaround a bug (thanks to BenH) - * Set unmapped entries to a scratch page instead of 0. + /* + * Try to workaround a bug (thanks to BenH) + * Set unmapped entries to a scratch page instead of 0. * Any prefetches that hit unmapped entries won't get an bus abort * then. */ - scratch = get_zeroed_page(GFP_KERNEL); - if (!scratch) + scratch = get_zeroed_page(GFP_KERNEL); + if (!scratch) panic("Cannot allocate iommu scratch page"); gart_unmapped_entry = GPTE_ENCODE(__pa(scratch)); - for (i = EMERGENCY_PAGES; i < iommu_pages; i++) + for (i = EMERGENCY_PAGES; i < iommu_pages; i++) iommu_gatt_base[i] = gart_unmapped_entry; flush_gart(); dma_ops = &gart_dma_ops; -} +} void __init gart_parse_options(char *p) { int arg; #ifdef CONFIG_IOMMU_LEAK - if (!strncmp(p,"leak",4)) { + if (!strncmp(p, "leak", 4)) { leak_trace = 1; p += 4; if (*p == '=') ++p; @@ -723,18 +767,18 @@ void __init gart_parse_options(char *p) #endif if (isdigit(*p) && get_option(&p, &arg)) iommu_size = arg; - if (!strncmp(p, "fullflush",8)) + if (!strncmp(p, "fullflush", 8)) iommu_fullflush = 1; - if (!strncmp(p, "nofullflush",11)) + if (!strncmp(p, "nofullflush", 11)) iommu_fullflush = 0; - if (!strncmp(p,"noagp",5)) + if (!strncmp(p, "noagp", 5)) no_agp = 1; - if (!strncmp(p, "noaperture",10)) + if (!strncmp(p, "noaperture", 10)) fix_aperture = 0; /* duplicated from pci-dma.c */ - if (!strncmp(p,"force",5)) + if (!strncmp(p, "force", 5)) gart_iommu_aperture_allowed = 1; - if (!strncmp(p,"allowed",7)) + if (!strncmp(p, "allowed", 7)) gart_iommu_aperture_allowed = 1; if (!strncmp(p, "memaper", 7)) { fallback_aper_force = 1; -- cgit v1.2.3 From 2f36fa13ce49ffd000249feaedfcbefbcc83a72f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 30 Jan 2008 13:30:12 +0100 Subject: x86: clean up arch/x86/kernel/e820_64.c White space and coding style cleanup. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/kernel/e820_64.c | 305 +++++++++++++++++++++++++++------------------- 1 file changed, 177 insertions(+), 128 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c index 04698e0b056c..d41cd2f01733 100644 --- a/arch/x86/kernel/e820_64.c +++ b/arch/x86/kernel/e820_64.c @@ -1,4 +1,4 @@ -/* +/* * Handle the memory map. * The functions here do the job until bootmem takes over. * @@ -29,44 +29,44 @@ struct e820map e820; -/* +/* * PFN of last memory page. */ -unsigned long end_pfn; +unsigned long end_pfn; EXPORT_SYMBOL(end_pfn); -/* +/* * end_pfn only includes RAM, while end_pfn_map includes all e820 entries. * The direct mapping extends to end_pfn_map, so that we can directly access * apertures, ACPI and other tables without having to play with fixmaps. - */ -unsigned long end_pfn_map; + */ +unsigned long end_pfn_map; -/* +/* * Last pfn which the user wants to use. */ static unsigned long __initdata end_user_pfn = MAXMEM>>PAGE_SHIFT; extern struct resource code_resource, data_resource, bss_resource; -/* Check for some hardcoded bad areas that early boot is not allowed to touch */ +/* Check for some hardcoded bad areas that early boot is not allowed to touch */ static inline int bad_addr(unsigned long *addrp, unsigned long size) -{ - unsigned long addr = *addrp, last = addr + size; +{ + unsigned long addr = *addrp, last = addr + size; /* various gunk below that needed for SMP startup */ - if (addr < 0x8000) { + if (addr < 0x8000) { *addrp = PAGE_ALIGN(0x8000); - return 1; + return 1; } /* direct mapping tables of the kernel */ - if (last >= table_start<= table_start<= __pa_symbol(&_text) && addr < __pa_symbol(&_end)) { @@ -97,9 +97,9 @@ static inline int bad_addr(unsigned long *addrp, unsigned long size) return 1; } #endif - /* XXX ramdisk image here? */ + /* XXX ramdisk image here? */ return 0; -} +} /* * This function checks if any part of the range is mapped @@ -107,16 +107,18 @@ static inline int bad_addr(unsigned long *addrp, unsigned long size) */ int e820_any_mapped(unsigned long start, unsigned long end, unsigned type) -{ +{ int i; - for (i = 0; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; - if (type && ei->type != type) + + for (i = 0; i < e820.nr_map; i++) { + struct e820entry *ei = &e820.map[i]; + + if (type && ei->type != type) continue; if (ei->addr >= end || ei->addr + ei->size <= start) - continue; - return 1; - } + continue; + return 1; + } return 0; } EXPORT_SYMBOL_GPL(e820_any_mapped); @@ -127,11 +129,14 @@ EXPORT_SYMBOL_GPL(e820_any_mapped); * Note: this function only works correct if the e820 table is sorted and * not-overlapping, which is the case */ -int __init e820_all_mapped(unsigned long start, unsigned long end, unsigned type) +int __init e820_all_mapped(unsigned long start, unsigned long end, + unsigned type) { int i; + for (i = 0; i < e820.nr_map; i++) { struct e820entry *ei = &e820.map[i]; + if (type && ei->type != type) continue; /* is the region (part) in overlap with the current region ?*/ @@ -143,59 +148,66 @@ int __init e820_all_mapped(unsigned long start, unsigned long end, unsigned type */ if (ei->addr <= start) start = ei->addr + ei->size; - /* if start is now at or beyond end, we're done, full coverage */ + /* + * if start is now at or beyond end, we're done, full + * coverage + */ if (start >= end) - return 1; /* we're done */ + return 1; } return 0; } -/* - * Find a free area in a specific range. - */ -unsigned long __init find_e820_area(unsigned long start, unsigned long end, unsigned size) -{ - int i; - for (i = 0; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; - unsigned long addr = ei->addr, last; - if (ei->type != E820_RAM) - continue; - if (addr < start) +/* + * Find a free area in a specific range. + */ +unsigned long __init find_e820_area(unsigned long start, unsigned long end, + unsigned size) +{ + int i; + + for (i = 0; i < e820.nr_map; i++) { + struct e820entry *ei = &e820.map[i]; + unsigned long addr = ei->addr, last; + + if (ei->type != E820_RAM) + continue; + if (addr < start) addr = start; - if (addr > ei->addr + ei->size) - continue; + if (addr > ei->addr + ei->size) + continue; while (bad_addr(&addr, size) && addr+size <= ei->addr+ei->size) ; last = PAGE_ALIGN(addr) + size; if (last > ei->addr + ei->size) continue; - if (last > end) + if (last > end) continue; - return addr; - } - return -1UL; -} + return addr; + } + return -1UL; +} /* * Find the highest page frame number we have available */ unsigned long __init e820_end_of_ram(void) { - unsigned long end_pfn = 0; + unsigned long end_pfn; + end_pfn = find_max_pfn_with_active_regions(); - - if (end_pfn > end_pfn_map) + + if (end_pfn > end_pfn_map) end_pfn_map = end_pfn; if (end_pfn_map > MAXMEM>>PAGE_SHIFT) end_pfn_map = MAXMEM>>PAGE_SHIFT; if (end_pfn > end_user_pfn) end_pfn = end_user_pfn; - if (end_pfn > end_pfn_map) - end_pfn = end_pfn_map; + if (end_pfn > end_pfn_map) + end_pfn = end_pfn_map; - printk("end_pfn_map = %lu\n", end_pfn_map); - return end_pfn; + printk(KERN_INFO "end_pfn_map = %lu\n", end_pfn_map); + return end_pfn; } /* @@ -219,9 +231,9 @@ void __init e820_reserve_resources(void) request_resource(&iomem_resource, res); if (e820.map[i].type == E820_RAM) { /* - * We don't know which RAM region contains kernel data, - * so we try it repeatedly and let the resource manager - * test it. + * We don't know which RAM region contains kernel data, + * so we try it repeatedly and let the resource manager + * test it. */ request_resource(res, &code_resource); request_resource(res, &data_resource); @@ -322,9 +334,9 @@ e820_register_active_regions(int nid, unsigned long start_pfn, add_active_range(nid, ei_startpfn, ei_endpfn); } -/* +/* * Add a memory region to the kernel e820 map. - */ + */ void __init add_memory_region(unsigned long start, unsigned long size, int type) { int x = e820.nr_map; @@ -349,9 +361,7 @@ unsigned long __init e820_hole_size(unsigned long start, unsigned long end) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long end_pfn = end >> PAGE_SHIFT; - unsigned long ei_startpfn; - unsigned long ei_endpfn; - unsigned long ram = 0; + unsigned long ei_startpfn, ei_endpfn, ram = 0; int i; for (i = 0; i < e820.nr_map; i++) { @@ -369,22 +379,25 @@ void __init e820_print_map(char *who) for (i = 0; i < e820.nr_map; i++) { printk(KERN_INFO " %s: %016Lx - %016Lx ", who, - (unsigned long long) e820.map[i].addr, - (unsigned long long) (e820.map[i].addr + e820.map[i].size)); + (unsigned long long) e820.map[i].addr, + (unsigned long long) + (e820.map[i].addr + e820.map[i].size)); switch (e820.map[i].type) { - case E820_RAM: printk("(usable)\n"); - break; + case E820_RAM: + printk(KERN_CONT "(usable)\n"); + break; case E820_RESERVED: - printk("(reserved)\n"); - break; + printk(KERN_CONT "(reserved)\n"); + break; case E820_ACPI: - printk("(ACPI data)\n"); - break; + printk(KERN_CONT "(ACPI data)\n"); + break; case E820_NVS: - printk("(ACPI NVS)\n"); - break; - default: printk("type %u\n", e820.map[i].type); - break; + printk(KERN_CONT "(ACPI NVS)\n"); + break; + default: + printk(KERN_CONT "type %u\n", e820.map[i].type); + break; } } } @@ -392,11 +405,11 @@ void __init e820_print_map(char *who) /* * Sanitize the BIOS e820 map. * - * Some e820 responses include overlapping entries. The following + * Some e820 responses include overlapping entries. The following * replaces the original e820 map with a new one, removing overlaps. * */ -static int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map) +static int __init sanitize_e820_map(struct e820entry *biosmap, char *pnr_map) { struct change_member { struct e820entry *pbios; /* pointer to original bios entry */ @@ -416,7 +429,8 @@ static int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map) int i; /* - Visually we're performing the following (1,2,3,4 = memory types)... + Visually we're performing the following + (1,2,3,4 = memory types)... Sample memory map (w/overlaps): ____22__________________ @@ -458,22 +472,23 @@ static int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map) old_nr = *pnr_map; /* bail out if we find any unreasonable addresses in bios map */ - for (i=0; iaddr = biosmap[i].addr; change_point[chgidx++]->pbios = &biosmap[i]; - change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size; + change_point[chgidx]->addr = biosmap[i].addr + + biosmap[i].size; change_point[chgidx++]->pbios = &biosmap[i]; } } @@ -483,75 +498,106 @@ static int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map) still_changing = 1; while (still_changing) { still_changing = 0; - for (i=1; i < chg_nr; i++) { - /* if > , swap */ - /* or, if current= & last=, swap */ - if ((change_point[i]->addr < change_point[i-1]->addr) || - ((change_point[i]->addr == change_point[i-1]->addr) && - (change_point[i]->addr == change_point[i]->pbios->addr) && - (change_point[i-1]->addr != change_point[i-1]->pbios->addr)) - ) - { + for (i = 1; i < chg_nr; i++) { + unsigned long long curaddr, lastaddr; + unsigned long long curpbaddr, lastpbaddr; + + curaddr = change_point[i]->addr; + lastaddr = change_point[i - 1]->addr; + curpbaddr = change_point[i]->pbios->addr; + lastpbaddr = change_point[i - 1]->pbios->addr; + + /* + * swap entries, when: + * + * curaddr > lastaddr or + * curaddr == lastaddr and curaddr == curpbaddr and + * lastaddr != lastpbaddr + */ + if (curaddr < lastaddr || + (curaddr == lastaddr && curaddr == curpbaddr && + lastaddr != lastpbaddr)) { change_tmp = change_point[i]; change_point[i] = change_point[i-1]; change_point[i-1] = change_tmp; - still_changing=1; + still_changing = 1; } } } /* create a new bios memory map, removing overlaps */ - overlap_entries=0; /* number of entries in the overlap table */ - new_bios_entry=0; /* index for creating new bios map entries */ + overlap_entries = 0; /* number of entries in the overlap table */ + new_bios_entry = 0; /* index for creating new bios map entries */ last_type = 0; /* start with undefined memory type */ last_addr = 0; /* start with 0 as last starting address */ + /* loop through change-points, determining affect on the new bios map */ - for (chgidx=0; chgidx < chg_nr; chgidx++) - { + for (chgidx = 0; chgidx < chg_nr; chgidx++) { /* keep track of all overlapping bios entries */ - if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr) - { - /* add map entry to overlap list (> 1 entry implies an overlap) */ - overlap_list[overlap_entries++]=change_point[chgidx]->pbios; - } - else - { - /* remove entry from list (order independent, so swap with last) */ - for (i=0; ipbios) - overlap_list[i] = overlap_list[overlap_entries-1]; + if (change_point[chgidx]->addr == + change_point[chgidx]->pbios->addr) { + /* + * add map entry to overlap list (> 1 entry + * implies an overlap) + */ + overlap_list[overlap_entries++] = + change_point[chgidx]->pbios; + } else { + /* + * remove entry from list (order independent, + * so swap with last) + */ + for (i = 0; i < overlap_entries; i++) { + if (overlap_list[i] == + change_point[chgidx]->pbios) + overlap_list[i] = + overlap_list[overlap_entries-1]; } overlap_entries--; } - /* if there are overlapping entries, decide which "type" to use */ - /* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */ + /* + * if there are overlapping entries, decide which + * "type" to use (larger value takes precedence -- + * 1=usable, 2,3,4,4+=unusable) + */ current_type = 0; - for (i=0; itype > current_type) current_type = overlap_list[i]->type; - /* continue building up new bios map based on this information */ + /* + * continue building up new bios map based on this + * information + */ if (current_type != last_type) { if (last_type != 0) { new_bios[new_bios_entry].size = change_point[chgidx]->addr - last_addr; - /* move forward only if the new size was non-zero */ + /* + * move forward only if the new size + * was non-zero + */ if (new_bios[new_bios_entry].size != 0) + /* + * no more space left for new + * bios entries ? + */ if (++new_bios_entry >= E820MAX) - break; /* no more space left for new bios entries */ + break; } if (current_type != 0) { - new_bios[new_bios_entry].addr = change_point[chgidx]->addr; + new_bios[new_bios_entry].addr = + change_point[chgidx]->addr; new_bios[new_bios_entry].type = current_type; - last_addr=change_point[chgidx]->addr; + last_addr = change_point[chgidx]->addr; } last_type = current_type; } } - new_nr = new_bios_entry; /* retain count for new bios entries */ + /* retain count for new bios entries */ + new_nr = new_bios_entry; /* copy new bios mapping into original location */ - memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry)); + memcpy(biosmap, new_bios, new_nr * sizeof(struct e820entry)); *pnr_map = new_nr; return 0; @@ -566,7 +612,7 @@ static int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map) * will have given us a memory map that we can use to properly * set up memory. If we aren't, we'll fake a memory map. */ -static int __init copy_e820_map(struct e820entry * biosmap, int nr_map) +static int __init copy_e820_map(struct e820entry *biosmap, int nr_map) { /* Only one memory region (or negative)? Ignore it */ if (nr_map < 2) @@ -583,7 +629,7 @@ static int __init copy_e820_map(struct e820entry * biosmap, int nr_map) return -1; add_memory_region(start, size, type); - } while (biosmap++,--nr_map); + } while (biosmap++, --nr_map); return 0; } @@ -613,9 +659,9 @@ static int __init parse_memopt(char *p) if (!p) return -EINVAL; end_user_pfn = memparse(p, &p); - end_user_pfn >>= PAGE_SHIFT; + end_user_pfn >>= PAGE_SHIFT; return 0; -} +} early_param("mem", parse_memopt); static int userdef __initdata; @@ -627,9 +673,9 @@ static int __init parse_memmap_opt(char *p) if (!strcmp(p, "exactmap")) { #ifdef CONFIG_CRASH_DUMP - /* If we are doing a crash dump, we - * still need to know the real mem - * size before original memory map is + /* + * If we are doing a crash dump, we still need to know + * the real mem size before original memory map is * reset. */ e820_register_active_regions(0, 0, -1UL); @@ -713,8 +759,10 @@ __init void e820_setup_gap(void) if (!found) { gapstart = (end_pfn << PAGE_SHIFT) + 1024*1024; - printk(KERN_ERR "PCI: Warning: Cannot find a gap in the 32bit address range\n" - KERN_ERR "PCI: Unassigned devices with 32bit resource registers may break!\n"); + printk(KERN_ERR "PCI: Warning: Cannot find a gap in the 32bit " + "address range\n" + KERN_ERR "PCI: Unassigned devices with 32bit resource " + "registers may break!\n"); } /* @@ -727,8 +775,9 @@ __init void e820_setup_gap(void) /* Fun with two's complement */ pci_mem_start = (gapstart + round) & -round; - printk(KERN_INFO "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n", - pci_mem_start, gapstart, gapsize); + printk(KERN_INFO + "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n", + pci_mem_start, gapstart, gapsize); } int __init arch_get_ram_range(int slot, u64 *addr, u64 *size) -- cgit v1.2.3 From 78aa1f66f77da078357bd263fcac95fbf6bca15b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 30 Jan 2008 13:30:13 +0100 Subject: x86: clean up arch/x86/kernel/ldt_32/64.c White space and coding style clenaup. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/kernel/ldt_32.c | 60 ++++++++++++++++++++++------------------- arch/x86/kernel/ldt_64.c | 69 +++++++++++++++++++++++++----------------------- 2 files changed, 69 insertions(+), 60 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/ldt_32.c b/arch/x86/kernel/ldt_32.c index 9ff90a27c45f..e366c5fd0d19 100644 --- a/arch/x86/kernel/ldt_32.c +++ b/arch/x86/kernel/ldt_32.c @@ -17,7 +17,7 @@ #include #include -#ifdef CONFIG_SMP /* avoids "defined but not used" warnig */ +#ifdef CONFIG_SMP static void flush_ldt(void *null) { if (current->active_mm) @@ -34,19 +34,20 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload) if (mincount <= pc->size) return 0; oldsize = pc->size; - mincount = (mincount+511)&(~511); - if (mincount*LDT_ENTRY_SIZE > PAGE_SIZE) - newldt = vmalloc(mincount*LDT_ENTRY_SIZE); + mincount = (mincount + 511) & (~511); + if (mincount * LDT_ENTRY_SIZE > PAGE_SIZE) + newldt = vmalloc(mincount * LDT_ENTRY_SIZE); else - newldt = kmalloc(mincount*LDT_ENTRY_SIZE, GFP_KERNEL); + newldt = kmalloc(mincount * LDT_ENTRY_SIZE, GFP_KERNEL); if (!newldt) return -ENOMEM; if (oldsize) - memcpy(newldt, pc->ldt, oldsize*LDT_ENTRY_SIZE); + memcpy(newldt, pc->ldt, oldsize * LDT_ENTRY_SIZE); oldldt = pc->ldt; - memset(newldt+oldsize*LDT_ENTRY_SIZE, 0, (mincount-oldsize)*LDT_ENTRY_SIZE); + memset(newldt + oldsize * LDT_ENTRY_SIZE, 0, + (mincount - oldsize) * LDT_ENTRY_SIZE); pc->ldt = newldt; wmb(); pc->size = mincount; @@ -55,6 +56,7 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload) if (reload) { #ifdef CONFIG_SMP cpumask_t mask; + preempt_disable(); load_LDT(pc); mask = cpumask_of_cpu(smp_processor_id()); @@ -66,7 +68,7 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload) #endif } if (oldsize) { - if (oldsize*LDT_ENTRY_SIZE > PAGE_SIZE) + if (oldsize * LDT_ENTRY_SIZE > PAGE_SIZE) vfree(oldldt); else kfree(oldldt); @@ -77,9 +79,10 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload) static inline int copy_ldt(mm_context_t *new, mm_context_t *old) { int err = alloc_ldt(new, old->size, 0); + if (err < 0) return err; - memcpy(new->ldt, old->ldt, old->size*LDT_ENTRY_SIZE); + memcpy(new->ldt, old->ldt, old->size * LDT_ENTRY_SIZE); return 0; } @@ -89,7 +92,7 @@ static inline int copy_ldt(mm_context_t *new, mm_context_t *old) */ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) { - struct mm_struct * old_mm; + struct mm_struct *old_mm; int retval = 0; mutex_init(&mm->context.lock); @@ -111,7 +114,7 @@ void destroy_context(struct mm_struct *mm) if (mm->context.size) { if (mm == current->active_mm) clear_LDT(); - if (mm->context.size*LDT_ENTRY_SIZE > PAGE_SIZE) + if (mm->context.size * LDT_ENTRY_SIZE > PAGE_SIZE) vfree(mm->context.ldt); else kfree(mm->context.ldt); @@ -119,19 +122,19 @@ void destroy_context(struct mm_struct *mm) } } -static int read_ldt(void __user * ptr, unsigned long bytecount) +static int read_ldt(void __user *ptr, unsigned long bytecount) { int err; unsigned long size; - struct mm_struct * mm = current->mm; + struct mm_struct *mm = current->mm; if (!mm->context.size) return 0; - if (bytecount > LDT_ENTRY_SIZE*LDT_ENTRIES) - bytecount = LDT_ENTRY_SIZE*LDT_ENTRIES; + if (bytecount > LDT_ENTRY_SIZE * LDT_ENTRIES) + bytecount = LDT_ENTRY_SIZE * LDT_ENTRIES; mutex_lock(&mm->context.lock); - size = mm->context.size*LDT_ENTRY_SIZE; + size = mm->context.size * LDT_ENTRY_SIZE; if (size > bytecount) size = bytecount; @@ -143,7 +146,7 @@ static int read_ldt(void __user * ptr, unsigned long bytecount) goto error_return; if (size != bytecount) { /* zero-fill the rest */ - if (clear_user(ptr+size, bytecount-size) != 0) { + if (clear_user(ptr + size, bytecount - size) != 0) { err = -EFAULT; goto error_return; } @@ -153,13 +156,13 @@ error_return: return err; } -static int read_default_ldt(void __user * ptr, unsigned long bytecount) +static int read_default_ldt(void __user *ptr, unsigned long bytecount) { int err; unsigned long size; err = 0; - size = 5*sizeof(struct desc_struct); + size = 5 * sizeof(struct desc_struct); if (size > bytecount) size = bytecount; @@ -170,9 +173,9 @@ static int read_default_ldt(void __user * ptr, unsigned long bytecount) return err; } -static int write_ldt(void __user * ptr, unsigned long bytecount, int oldmode) +static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode) { - struct mm_struct * mm = current->mm; + struct mm_struct *mm = current->mm; __u32 entry_1, entry_2; int error; struct user_desc ldt_info; @@ -180,7 +183,7 @@ static int write_ldt(void __user * ptr, unsigned long bytecount, int oldmode) error = -EINVAL; if (bytecount != sizeof(ldt_info)) goto out; - error = -EFAULT; + error = -EFAULT; if (copy_from_user(&ldt_info, ptr, sizeof(ldt_info))) goto out; @@ -196,13 +199,14 @@ static int write_ldt(void __user * ptr, unsigned long bytecount, int oldmode) mutex_lock(&mm->context.lock); if (ldt_info.entry_number >= mm->context.size) { - error = alloc_ldt(¤t->mm->context, ldt_info.entry_number+1, 1); + error = alloc_ldt(¤t->mm->context, + ldt_info.entry_number + 1, 1); if (error < 0) goto out_unlock; } - /* Allow LDTs to be cleared by the user. */ - if (ldt_info.base_addr == 0 && ldt_info.limit == 0) { + /* Allow LDTs to be cleared by the user. */ + if (ldt_info.base_addr == 0 && ldt_info.limit == 0) { if (oldmode || LDT_empty(&ldt_info)) { entry_1 = 0; entry_2 = 0; @@ -217,7 +221,8 @@ static int write_ldt(void __user * ptr, unsigned long bytecount, int oldmode) /* Install the new entry ... */ install: - write_ldt_entry(mm->context.ldt, ldt_info.entry_number, entry_1, entry_2); + write_ldt_entry(mm->context.ldt, ldt_info.entry_number, entry_1, + entry_2); error = 0; out_unlock: @@ -226,7 +231,8 @@ out: return error; } -asmlinkage int sys_modify_ldt(int func, void __user *ptr, unsigned long bytecount) +asmlinkage int sys_modify_ldt(int func, void __user *ptr, + unsigned long bytecount) { int ret = -ENOSYS; diff --git a/arch/x86/kernel/ldt_64.c b/arch/x86/kernel/ldt_64.c index 60e57abb8e90..6c41db367de8 100644 --- a/arch/x86/kernel/ldt_64.c +++ b/arch/x86/kernel/ldt_64.c @@ -2,7 +2,7 @@ * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds * Copyright (C) 1999 Ingo Molnar * Copyright (C) 2002 Andi Kleen - * + * * This handles calls from both 32bit and 64bit mode. */ @@ -20,11 +20,11 @@ #include #include -#ifdef CONFIG_SMP /* avoids "defined but not used" warnig */ +#ifdef CONFIG_SMP static void flush_ldt(void *null) { if (current->active_mm) - load_LDT(¤t->active_mm->context); + load_LDT(¤t->active_mm->context); } #endif @@ -37,19 +37,20 @@ static int alloc_ldt(mm_context_t *pc, unsigned mincount, int reload) if (mincount <= (unsigned)pc->size) return 0; oldsize = pc->size; - mincount = (mincount+511)&(~511); - if (mincount*LDT_ENTRY_SIZE > PAGE_SIZE) - newldt = vmalloc(mincount*LDT_ENTRY_SIZE); + mincount = (mincount + 511) & (~511); + if (mincount * LDT_ENTRY_SIZE > PAGE_SIZE) + newldt = vmalloc(mincount * LDT_ENTRY_SIZE); else - newldt = kmalloc(mincount*LDT_ENTRY_SIZE, GFP_KERNEL); + newldt = kmalloc(mincount * LDT_ENTRY_SIZE, GFP_KERNEL); if (!newldt) return -ENOMEM; if (oldsize) - memcpy(newldt, pc->ldt, oldsize*LDT_ENTRY_SIZE); + memcpy(newldt, pc->ldt, oldsize * LDT_ENTRY_SIZE); oldldt = pc->ldt; - memset(newldt+oldsize*LDT_ENTRY_SIZE, 0, (mincount-oldsize)*LDT_ENTRY_SIZE); + memset(newldt + oldsize * LDT_ENTRY_SIZE, 0, + (mincount - oldsize) * LDT_ENTRY_SIZE); wmb(); pc->ldt = newldt; wmb(); @@ -70,7 +71,7 @@ static int alloc_ldt(mm_context_t *pc, unsigned mincount, int reload) #endif } if (oldsize) { - if (oldsize*LDT_ENTRY_SIZE > PAGE_SIZE) + if (oldsize * LDT_ENTRY_SIZE > PAGE_SIZE) vfree(oldldt); else kfree(oldldt); @@ -81,9 +82,10 @@ static int alloc_ldt(mm_context_t *pc, unsigned mincount, int reload) static inline int copy_ldt(mm_context_t *new, mm_context_t *old) { int err = alloc_ldt(new, old->size, 0); + if (err < 0) return err; - memcpy(new->ldt, old->ldt, old->size*LDT_ENTRY_SIZE); + memcpy(new->ldt, old->ldt, old->size * LDT_ENTRY_SIZE); return 0; } @@ -93,7 +95,7 @@ static inline int copy_ldt(mm_context_t *new, mm_context_t *old) */ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) { - struct mm_struct * old_mm; + struct mm_struct *old_mm; int retval = 0; mutex_init(&mm->context.lock); @@ -108,13 +110,12 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) } /* - * * Don't touch the LDT register - we're already in the next thread. */ void destroy_context(struct mm_struct *mm) { if (mm->context.size) { - if ((unsigned)mm->context.size*LDT_ENTRY_SIZE > PAGE_SIZE) + if ((unsigned)mm->context.size * LDT_ENTRY_SIZE > PAGE_SIZE) vfree(mm->context.ldt); else kfree(mm->context.ldt); @@ -122,19 +123,19 @@ void destroy_context(struct mm_struct *mm) } } -static int read_ldt(void __user * ptr, unsigned long bytecount) +static int read_ldt(void __user *ptr, unsigned long bytecount) { int err; unsigned long size; - struct mm_struct * mm = current->mm; + struct mm_struct *mm = current->mm; if (!mm->context.size) return 0; - if (bytecount > LDT_ENTRY_SIZE*LDT_ENTRIES) - bytecount = LDT_ENTRY_SIZE*LDT_ENTRIES; + if (bytecount > LDT_ENTRY_SIZE * LDT_ENTRIES) + bytecount = LDT_ENTRY_SIZE * LDT_ENTRIES; mutex_lock(&mm->context.lock); - size = mm->context.size*LDT_ENTRY_SIZE; + size = mm->context.size * LDT_ENTRY_SIZE; if (size > bytecount) size = bytecount; @@ -146,7 +147,7 @@ static int read_ldt(void __user * ptr, unsigned long bytecount) goto error_return; if (size != bytecount) { /* zero-fill the rest */ - if (clear_user(ptr+size, bytecount-size) != 0) { + if (clear_user(ptr + size, bytecount - size) != 0) { err = -EFAULT; goto error_return; } @@ -156,21 +157,21 @@ error_return: return err; } -static int read_default_ldt(void __user * ptr, unsigned long bytecount) +static int read_default_ldt(void __user *ptr, unsigned long bytecount) { - /* Arbitrary number */ + /* Arbitrary number */ /* x86-64 default LDT is all zeros */ - if (bytecount > 128) - bytecount = 128; + if (bytecount > 128) + bytecount = 128; if (clear_user(ptr, bytecount)) return -EFAULT; - return bytecount; + return bytecount; } -static int write_ldt(void __user * ptr, unsigned long bytecount, int oldmode) +static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode) { struct task_struct *me = current; - struct mm_struct * mm = me->mm; + struct mm_struct *mm = me->mm; __u32 entry_1, entry_2, *lp; int error; struct user_desc ldt_info; @@ -179,7 +180,7 @@ static int write_ldt(void __user * ptr, unsigned long bytecount, int oldmode) if (bytecount != sizeof(ldt_info)) goto out; - error = -EFAULT; + error = -EFAULT; if (copy_from_user(&ldt_info, ptr, bytecount)) goto out; @@ -195,15 +196,16 @@ static int write_ldt(void __user * ptr, unsigned long bytecount, int oldmode) mutex_lock(&mm->context.lock); if (ldt_info.entry_number >= (unsigned)mm->context.size) { - error = alloc_ldt(¤t->mm->context, ldt_info.entry_number+1, 1); + error = alloc_ldt(¤t->mm->context, + ldt_info.entry_number + 1, 1); if (error < 0) goto out_unlock; } - lp = (__u32 *) ((ldt_info.entry_number << 3) + (char *) mm->context.ldt); + lp = (__u32 *)((ldt_info.entry_number << 3) + (char *)mm->context.ldt); - /* Allow LDTs to be cleared by the user. */ - if (ldt_info.base_addr == 0 && ldt_info.limit == 0) { + /* Allow LDTs to be cleared by the user. */ + if (ldt_info.base_addr == 0 && ldt_info.limit == 0) { if (oldmode || LDT_empty(&ldt_info)) { entry_1 = 0; entry_2 = 0; @@ -228,7 +230,8 @@ out: return error; } -asmlinkage int sys_modify_ldt(int func, void __user *ptr, unsigned long bytecount) +asmlinkage int sys_modify_ldt(int func, void __user *ptr, + unsigned long bytecount) { int ret = -ENOSYS; -- cgit v1.2.3 From fc2d625c4fac18e672a3b7c61af5de22d7ab7d87 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 30 Jan 2008 13:30:13 +0100 Subject: x86: introduce ldt_write accessor Create a ldt write accessor like the 32 bit one. Preparatory patch for merging ldt.c and anyway necessary for 64bit paravirt ops. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/kernel/ldt_64.c | 8 +++----- include/asm-x86/desc_64.h | 9 +++++++++ 2 files changed, 12 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/ldt_64.c b/arch/x86/kernel/ldt_64.c index 6c41db367de8..d72dc7a0636f 100644 --- a/arch/x86/kernel/ldt_64.c +++ b/arch/x86/kernel/ldt_64.c @@ -172,7 +172,7 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode) { struct task_struct *me = current; struct mm_struct *mm = me->mm; - __u32 entry_1, entry_2, *lp; + __u32 entry_1, entry_2; int error; struct user_desc ldt_info; @@ -202,8 +202,6 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode) goto out_unlock; } - lp = (__u32 *)((ldt_info.entry_number << 3) + (char *)mm->context.ldt); - /* Allow LDTs to be cleared by the user. */ if (ldt_info.base_addr == 0 && ldt_info.limit == 0) { if (oldmode || LDT_empty(&ldt_info)) { @@ -220,8 +218,8 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode) /* Install the new entry ... */ install: - *lp = entry_1; - *(lp+1) = entry_2; + write_ldt_entry(mm->context.ldt, ldt_info.entry_number, entry_1, + entry_2); error = 0; out_unlock: diff --git a/include/asm-x86/desc_64.h b/include/asm-x86/desc_64.h index bb2009ecbbca..7d48df72aef2 100644 --- a/include/asm-x86/desc_64.h +++ b/include/asm-x86/desc_64.h @@ -38,6 +38,15 @@ extern struct desc_struct default_ldt[]; extern struct gate_struct idt_table[]; extern struct desc_ptr cpu_gdt_descr[]; +static inline void write_ldt_entry(struct desc_struct *ldt, + int entry, u32 entry_low, u32 entry_high) +{ + __u32 *lp = (__u32 *)((entry << 3) + (char *)ldt); + + lp[0] = entry_low; + lp[1] = entry_high; +} + /* the cpu gdt accessor */ #define cpu_gdt(_cpu) ((struct desc_struct *)cpu_gdt_descr[_cpu].address) -- cgit v1.2.3 From 70f5088dd5e9fbd3a71b3a5b01395c676158194b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 30 Jan 2008 13:30:13 +0100 Subject: x86: prepare arch/x86/kernel/ldt_32/64.c for merging White space and coding style cleanups. Change unsigned to int. There is no win when we compare mincount against pc->size, which is an int as well. Casting pc->size to unsigned just might hide real problems. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/kernel/ldt_32.c | 3 +-- arch/x86/kernel/ldt_64.c | 24 +++++++++++------------- 2 files changed, 12 insertions(+), 15 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/ldt_32.c b/arch/x86/kernel/ldt_32.c index e366c5fd0d19..bb15753abaf2 100644 --- a/arch/x86/kernel/ldt_32.c +++ b/arch/x86/kernel/ldt_32.c @@ -27,8 +27,7 @@ static void flush_ldt(void *null) static int alloc_ldt(mm_context_t *pc, int mincount, int reload) { - void *oldldt; - void *newldt; + void *oldldt, *newldt; int oldsize; if (mincount <= pc->size) diff --git a/arch/x86/kernel/ldt_64.c b/arch/x86/kernel/ldt_64.c index d72dc7a0636f..95903938e7ad 100644 --- a/arch/x86/kernel/ldt_64.c +++ b/arch/x86/kernel/ldt_64.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #ifdef CONFIG_SMP static void flush_ldt(void *null) @@ -28,13 +28,12 @@ static void flush_ldt(void *null) } #endif -static int alloc_ldt(mm_context_t *pc, unsigned mincount, int reload) +static int alloc_ldt(mm_context_t *pc, int mincount, int reload) { - void *oldldt; - void *newldt; - unsigned oldsize; + void *oldldt, *newldt; + int oldsize; - if (mincount <= (unsigned)pc->size) + if (mincount <= pc->size) return 0; oldsize = pc->size; mincount = (mincount + 511) & (~511); @@ -56,13 +55,14 @@ static int alloc_ldt(mm_context_t *pc, unsigned mincount, int reload) wmb(); pc->size = mincount; wmb(); + if (reload) { #ifdef CONFIG_SMP cpumask_t mask; preempt_disable(); - mask = cpumask_of_cpu(smp_processor_id()); load_LDT(pc); + mask = cpumask_of_cpu(smp_processor_id()); if (!cpus_equal(current->mm->cpu_vm_mask, mask)) smp_call_function(flush_ldt, NULL, 1, 1); preempt_enable(); @@ -115,7 +115,7 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) void destroy_context(struct mm_struct *mm) { if (mm->context.size) { - if ((unsigned)mm->context.size * LDT_ENTRY_SIZE > PAGE_SIZE) + if (mm->context.size * LDT_ENTRY_SIZE > PAGE_SIZE) vfree(mm->context.ldt); else kfree(mm->context.ldt); @@ -170,18 +170,16 @@ static int read_default_ldt(void __user *ptr, unsigned long bytecount) static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode) { - struct task_struct *me = current; - struct mm_struct *mm = me->mm; + struct mm_struct *mm = current->mm; __u32 entry_1, entry_2; int error; struct user_desc ldt_info; error = -EINVAL; - if (bytecount != sizeof(ldt_info)) goto out; error = -EFAULT; - if (copy_from_user(&ldt_info, ptr, bytecount)) + if (copy_from_user(&ldt_info, ptr, sizeof(ldt_info))) goto out; error = -EINVAL; @@ -195,7 +193,7 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode) } mutex_lock(&mm->context.lock); - if (ldt_info.entry_number >= (unsigned)mm->context.size) { + if (ldt_info.entry_number >= mm->context.size) { error = alloc_ldt(¤t->mm->context, ldt_info.entry_number + 1, 1); if (error < 0) -- cgit v1.2.3 From 77e463d1040d6310211ac5162729f5d4afc4dd8c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 30 Jan 2008 13:30:14 +0100 Subject: x86: merge arch/x86/kernel/ldt_32/64.c Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/kernel/Makefile_32 | 2 +- arch/x86/kernel/Makefile_64 | 2 +- arch/x86/kernel/ldt.c | 264 ++++++++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/ldt_32.c | 253 ------------------------------------------ arch/x86/kernel/ldt_64.c | 249 ----------------------------------------- 5 files changed, 266 insertions(+), 504 deletions(-) create mode 100644 arch/x86/kernel/ldt.c delete mode 100644 arch/x86/kernel/ldt_32.c delete mode 100644 arch/x86/kernel/ldt_64.c (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/Makefile_32 b/arch/x86/kernel/Makefile_32 index 0cc1981d1e38..31ff982bc26b 100644 --- a/arch/x86/kernel/Makefile_32 +++ b/arch/x86/kernel/Makefile_32 @@ -6,7 +6,7 @@ extra-y := head_32.o init_task.o vmlinux.lds CPPFLAGS_vmlinux.lds += -Ui386 obj-y := process_32.o signal_32.o entry_32.o traps_32.o irq_32.o \ - ptrace_32.o time_32.o ioport_32.o ldt_32.o setup_32.o i8259_32.o sys_i386_32.o \ + ptrace_32.o time_32.o ioport_32.o ldt.o setup_32.o i8259_32.o sys_i386_32.o \ pci-dma_32.o i386_ksyms_32.o i387_32.o bootflag.o e820_32.o\ quirks.o i8237.o topology.o alternative.o i8253.o tsc_32.o io_delay.o diff --git a/arch/x86/kernel/Makefile_64 b/arch/x86/kernel/Makefile_64 index 08a68f0d8fda..9cb3df27c413 100644 --- a/arch/x86/kernel/Makefile_64 +++ b/arch/x86/kernel/Makefile_64 @@ -7,7 +7,7 @@ CPPFLAGS_vmlinux.lds += -Ux86_64 EXTRA_AFLAGS := -traditional obj-y := process_64.o signal_64.o entry_64.o traps_64.o irq_64.o \ - ptrace_64.o time_64.o ioport_64.o ldt_64.o setup_64.o i8259_64.o sys_x86_64.o \ + ptrace_64.o time_64.o ioport_64.o ldt.o setup_64.o i8259_64.o sys_x86_64.o \ x8664_ksyms_64.o i387_64.o syscall_64.o vsyscall_64.o \ setup64.o bootflag.o e820_64.o reboot_64.o quirks.o i8237.o \ pci-dma_64.o pci-nommu_64.o alternative.o hpet.o tsc_64.o bugs_64.o \ diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c new file mode 100644 index 000000000000..a8cdca3615bf --- /dev/null +++ b/arch/x86/kernel/ldt.c @@ -0,0 +1,264 @@ +/* + * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds + * Copyright (C) 1999 Ingo Molnar + * Copyright (C) 2002 Andi Kleen + * + * This handles calls from both 32bit and 64bit mode. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#ifdef CONFIG_SMP +static void flush_ldt(void *null) +{ + if (current->active_mm) + load_LDT(¤t->active_mm->context); +} +#endif + +static int alloc_ldt(mm_context_t *pc, int mincount, int reload) +{ + void *oldldt, *newldt; + int oldsize; + + if (mincount <= pc->size) + return 0; + oldsize = pc->size; + mincount = (mincount + 511) & (~511); + if (mincount * LDT_ENTRY_SIZE > PAGE_SIZE) + newldt = vmalloc(mincount * LDT_ENTRY_SIZE); + else + newldt = kmalloc(mincount * LDT_ENTRY_SIZE, GFP_KERNEL); + + if (!newldt) + return -ENOMEM; + + if (oldsize) + memcpy(newldt, pc->ldt, oldsize * LDT_ENTRY_SIZE); + oldldt = pc->ldt; + memset(newldt + oldsize * LDT_ENTRY_SIZE, 0, + (mincount - oldsize) * LDT_ENTRY_SIZE); + +#ifdef CONFIG_X86_64 + /* CHECKME: Do we really need this ? */ + wmb(); +#endif + pc->ldt = newldt; + wmb(); + pc->size = mincount; + wmb(); + + if (reload) { +#ifdef CONFIG_SMP + cpumask_t mask; + + preempt_disable(); + load_LDT(pc); + mask = cpumask_of_cpu(smp_processor_id()); + if (!cpus_equal(current->mm->cpu_vm_mask, mask)) + smp_call_function(flush_ldt, NULL, 1, 1); + preempt_enable(); +#else + load_LDT(pc); +#endif + } + if (oldsize) { + if (oldsize * LDT_ENTRY_SIZE > PAGE_SIZE) + vfree(oldldt); + else + kfree(oldldt); + } + return 0; +} + +static inline int copy_ldt(mm_context_t *new, mm_context_t *old) +{ + int err = alloc_ldt(new, old->size, 0); + + if (err < 0) + return err; + memcpy(new->ldt, old->ldt, old->size * LDT_ENTRY_SIZE); + return 0; +} + +/* + * we do not have to muck with descriptors here, that is + * done in switch_mm() as needed. + */ +int init_new_context(struct task_struct *tsk, struct mm_struct *mm) +{ + struct mm_struct *old_mm; + int retval = 0; + + mutex_init(&mm->context.lock); + mm->context.size = 0; + old_mm = current->mm; + if (old_mm && old_mm->context.size > 0) { + mutex_lock(&old_mm->context.lock); + retval = copy_ldt(&mm->context, &old_mm->context); + mutex_unlock(&old_mm->context.lock); + } + return retval; +} + +/* + * No need to lock the MM as we are the last user + * + * 64bit: Don't touch the LDT register - we're already in the next thread. + */ +void destroy_context(struct mm_struct *mm) +{ + if (mm->context.size) { +#ifdef CONFIG_X86_32 + /* CHECKME: Can this ever happen ? */ + if (mm == current->active_mm) + clear_LDT(); +#endif + if (mm->context.size * LDT_ENTRY_SIZE > PAGE_SIZE) + vfree(mm->context.ldt); + else + kfree(mm->context.ldt); + mm->context.size = 0; + } +} + +static int read_ldt(void __user *ptr, unsigned long bytecount) +{ + int err; + unsigned long size; + struct mm_struct *mm = current->mm; + + if (!mm->context.size) + return 0; + if (bytecount > LDT_ENTRY_SIZE * LDT_ENTRIES) + bytecount = LDT_ENTRY_SIZE * LDT_ENTRIES; + + mutex_lock(&mm->context.lock); + size = mm->context.size * LDT_ENTRY_SIZE; + if (size > bytecount) + size = bytecount; + + err = 0; + if (copy_to_user(ptr, mm->context.ldt, size)) + err = -EFAULT; + mutex_unlock(&mm->context.lock); + if (err < 0) + goto error_return; + if (size != bytecount) { + /* zero-fill the rest */ + if (clear_user(ptr + size, bytecount - size) != 0) { + err = -EFAULT; + goto error_return; + } + } + return bytecount; +error_return: + return err; +} + +static int read_default_ldt(void __user *ptr, unsigned long bytecount) +{ + /* CHECKME: Can we use _one_ random number ? */ +#ifdef CONFIG_X86_32 + unsigned long size = 5 * sizeof(struct desc_struct); +#else + unsigned long size = 128; +#endif + if (bytecount > size) + bytecount = size; + if (clear_user(ptr, bytecount)) + return -EFAULT; + return bytecount; +} + +static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode) +{ + struct mm_struct *mm = current->mm; + __u32 entry_1, entry_2; + int error; + struct user_desc ldt_info; + + error = -EINVAL; + if (bytecount != sizeof(ldt_info)) + goto out; + error = -EFAULT; + if (copy_from_user(&ldt_info, ptr, sizeof(ldt_info))) + goto out; + + error = -EINVAL; + if (ldt_info.entry_number >= LDT_ENTRIES) + goto out; + if (ldt_info.contents == 3) { + if (oldmode) + goto out; + if (ldt_info.seg_not_present == 0) + goto out; + } + + mutex_lock(&mm->context.lock); + if (ldt_info.entry_number >= mm->context.size) { + error = alloc_ldt(¤t->mm->context, + ldt_info.entry_number + 1, 1); + if (error < 0) + goto out_unlock; + } + + /* Allow LDTs to be cleared by the user. */ + if (ldt_info.base_addr == 0 && ldt_info.limit == 0) { + if (oldmode || LDT_empty(&ldt_info)) { + entry_1 = 0; + entry_2 = 0; + goto install; + } + } + + entry_1 = LDT_entry_a(&ldt_info); + entry_2 = LDT_entry_b(&ldt_info); + if (oldmode) + entry_2 &= ~(1 << 20); + + /* Install the new entry ... */ +install: + write_ldt_entry(mm->context.ldt, ldt_info.entry_number, entry_1, + entry_2); + error = 0; + +out_unlock: + mutex_unlock(&mm->context.lock); +out: + return error; +} + +asmlinkage int sys_modify_ldt(int func, void __user *ptr, + unsigned long bytecount) +{ + int ret = -ENOSYS; + + switch (func) { + case 0: + ret = read_ldt(ptr, bytecount); + break; + case 1: + ret = write_ldt(ptr, bytecount, 1); + break; + case 2: + ret = read_default_ldt(ptr, bytecount); + break; + case 0x11: + ret = write_ldt(ptr, bytecount, 0); + break; + } + return ret; +} diff --git a/arch/x86/kernel/ldt_32.c b/arch/x86/kernel/ldt_32.c deleted file mode 100644 index bb15753abaf2..000000000000 --- a/arch/x86/kernel/ldt_32.c +++ /dev/null @@ -1,253 +0,0 @@ -/* - * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds - * Copyright (C) 1999 Ingo Molnar - */ - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#ifdef CONFIG_SMP -static void flush_ldt(void *null) -{ - if (current->active_mm) - load_LDT(¤t->active_mm->context); -} -#endif - -static int alloc_ldt(mm_context_t *pc, int mincount, int reload) -{ - void *oldldt, *newldt; - int oldsize; - - if (mincount <= pc->size) - return 0; - oldsize = pc->size; - mincount = (mincount + 511) & (~511); - if (mincount * LDT_ENTRY_SIZE > PAGE_SIZE) - newldt = vmalloc(mincount * LDT_ENTRY_SIZE); - else - newldt = kmalloc(mincount * LDT_ENTRY_SIZE, GFP_KERNEL); - - if (!newldt) - return -ENOMEM; - - if (oldsize) - memcpy(newldt, pc->ldt, oldsize * LDT_ENTRY_SIZE); - oldldt = pc->ldt; - memset(newldt + oldsize * LDT_ENTRY_SIZE, 0, - (mincount - oldsize) * LDT_ENTRY_SIZE); - pc->ldt = newldt; - wmb(); - pc->size = mincount; - wmb(); - - if (reload) { -#ifdef CONFIG_SMP - cpumask_t mask; - - preempt_disable(); - load_LDT(pc); - mask = cpumask_of_cpu(smp_processor_id()); - if (!cpus_equal(current->mm->cpu_vm_mask, mask)) - smp_call_function(flush_ldt, NULL, 1, 1); - preempt_enable(); -#else - load_LDT(pc); -#endif - } - if (oldsize) { - if (oldsize * LDT_ENTRY_SIZE > PAGE_SIZE) - vfree(oldldt); - else - kfree(oldldt); - } - return 0; -} - -static inline int copy_ldt(mm_context_t *new, mm_context_t *old) -{ - int err = alloc_ldt(new, old->size, 0); - - if (err < 0) - return err; - memcpy(new->ldt, old->ldt, old->size * LDT_ENTRY_SIZE); - return 0; -} - -/* - * we do not have to muck with descriptors here, that is - * done in switch_mm() as needed. - */ -int init_new_context(struct task_struct *tsk, struct mm_struct *mm) -{ - struct mm_struct *old_mm; - int retval = 0; - - mutex_init(&mm->context.lock); - mm->context.size = 0; - old_mm = current->mm; - if (old_mm && old_mm->context.size > 0) { - mutex_lock(&old_mm->context.lock); - retval = copy_ldt(&mm->context, &old_mm->context); - mutex_unlock(&old_mm->context.lock); - } - return retval; -} - -/* - * No need to lock the MM as we are the last user - */ -void destroy_context(struct mm_struct *mm) -{ - if (mm->context.size) { - if (mm == current->active_mm) - clear_LDT(); - if (mm->context.size * LDT_ENTRY_SIZE > PAGE_SIZE) - vfree(mm->context.ldt); - else - kfree(mm->context.ldt); - mm->context.size = 0; - } -} - -static int read_ldt(void __user *ptr, unsigned long bytecount) -{ - int err; - unsigned long size; - struct mm_struct *mm = current->mm; - - if (!mm->context.size) - return 0; - if (bytecount > LDT_ENTRY_SIZE * LDT_ENTRIES) - bytecount = LDT_ENTRY_SIZE * LDT_ENTRIES; - - mutex_lock(&mm->context.lock); - size = mm->context.size * LDT_ENTRY_SIZE; - if (size > bytecount) - size = bytecount; - - err = 0; - if (copy_to_user(ptr, mm->context.ldt, size)) - err = -EFAULT; - mutex_unlock(&mm->context.lock); - if (err < 0) - goto error_return; - if (size != bytecount) { - /* zero-fill the rest */ - if (clear_user(ptr + size, bytecount - size) != 0) { - err = -EFAULT; - goto error_return; - } - } - return bytecount; -error_return: - return err; -} - -static int read_default_ldt(void __user *ptr, unsigned long bytecount) -{ - int err; - unsigned long size; - - err = 0; - size = 5 * sizeof(struct desc_struct); - if (size > bytecount) - size = bytecount; - - err = size; - if (clear_user(ptr, size)) - err = -EFAULT; - - return err; -} - -static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode) -{ - struct mm_struct *mm = current->mm; - __u32 entry_1, entry_2; - int error; - struct user_desc ldt_info; - - error = -EINVAL; - if (bytecount != sizeof(ldt_info)) - goto out; - error = -EFAULT; - if (copy_from_user(&ldt_info, ptr, sizeof(ldt_info))) - goto out; - - error = -EINVAL; - if (ldt_info.entry_number >= LDT_ENTRIES) - goto out; - if (ldt_info.contents == 3) { - if (oldmode) - goto out; - if (ldt_info.seg_not_present == 0) - goto out; - } - - mutex_lock(&mm->context.lock); - if (ldt_info.entry_number >= mm->context.size) { - error = alloc_ldt(¤t->mm->context, - ldt_info.entry_number + 1, 1); - if (error < 0) - goto out_unlock; - } - - /* Allow LDTs to be cleared by the user. */ - if (ldt_info.base_addr == 0 && ldt_info.limit == 0) { - if (oldmode || LDT_empty(&ldt_info)) { - entry_1 = 0; - entry_2 = 0; - goto install; - } - } - - entry_1 = LDT_entry_a(&ldt_info); - entry_2 = LDT_entry_b(&ldt_info); - if (oldmode) - entry_2 &= ~(1 << 20); - - /* Install the new entry ... */ -install: - write_ldt_entry(mm->context.ldt, ldt_info.entry_number, entry_1, - entry_2); - error = 0; - -out_unlock: - mutex_unlock(&mm->context.lock); -out: - return error; -} - -asmlinkage int sys_modify_ldt(int func, void __user *ptr, - unsigned long bytecount) -{ - int ret = -ENOSYS; - - switch (func) { - case 0: - ret = read_ldt(ptr, bytecount); - break; - case 1: - ret = write_ldt(ptr, bytecount, 1); - break; - case 2: - ret = read_default_ldt(ptr, bytecount); - break; - case 0x11: - ret = write_ldt(ptr, bytecount, 0); - break; - } - return ret; -} diff --git a/arch/x86/kernel/ldt_64.c b/arch/x86/kernel/ldt_64.c deleted file mode 100644 index 95903938e7ad..000000000000 --- a/arch/x86/kernel/ldt_64.c +++ /dev/null @@ -1,249 +0,0 @@ -/* - * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds - * Copyright (C) 1999 Ingo Molnar - * Copyright (C) 2002 Andi Kleen - * - * This handles calls from both 32bit and 64bit mode. - */ - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#ifdef CONFIG_SMP -static void flush_ldt(void *null) -{ - if (current->active_mm) - load_LDT(¤t->active_mm->context); -} -#endif - -static int alloc_ldt(mm_context_t *pc, int mincount, int reload) -{ - void *oldldt, *newldt; - int oldsize; - - if (mincount <= pc->size) - return 0; - oldsize = pc->size; - mincount = (mincount + 511) & (~511); - if (mincount * LDT_ENTRY_SIZE > PAGE_SIZE) - newldt = vmalloc(mincount * LDT_ENTRY_SIZE); - else - newldt = kmalloc(mincount * LDT_ENTRY_SIZE, GFP_KERNEL); - - if (!newldt) - return -ENOMEM; - - if (oldsize) - memcpy(newldt, pc->ldt, oldsize * LDT_ENTRY_SIZE); - oldldt = pc->ldt; - memset(newldt + oldsize * LDT_ENTRY_SIZE, 0, - (mincount - oldsize) * LDT_ENTRY_SIZE); - wmb(); - pc->ldt = newldt; - wmb(); - pc->size = mincount; - wmb(); - - if (reload) { -#ifdef CONFIG_SMP - cpumask_t mask; - - preempt_disable(); - load_LDT(pc); - mask = cpumask_of_cpu(smp_processor_id()); - if (!cpus_equal(current->mm->cpu_vm_mask, mask)) - smp_call_function(flush_ldt, NULL, 1, 1); - preempt_enable(); -#else - load_LDT(pc); -#endif - } - if (oldsize) { - if (oldsize * LDT_ENTRY_SIZE > PAGE_SIZE) - vfree(oldldt); - else - kfree(oldldt); - } - return 0; -} - -static inline int copy_ldt(mm_context_t *new, mm_context_t *old) -{ - int err = alloc_ldt(new, old->size, 0); - - if (err < 0) - return err; - memcpy(new->ldt, old->ldt, old->size * LDT_ENTRY_SIZE); - return 0; -} - -/* - * we do not have to muck with descriptors here, that is - * done in switch_mm() as needed. - */ -int init_new_context(struct task_struct *tsk, struct mm_struct *mm) -{ - struct mm_struct *old_mm; - int retval = 0; - - mutex_init(&mm->context.lock); - mm->context.size = 0; - old_mm = current->mm; - if (old_mm && old_mm->context.size > 0) { - mutex_lock(&old_mm->context.lock); - retval = copy_ldt(&mm->context, &old_mm->context); - mutex_unlock(&old_mm->context.lock); - } - return retval; -} - -/* - * Don't touch the LDT register - we're already in the next thread. - */ -void destroy_context(struct mm_struct *mm) -{ - if (mm->context.size) { - if (mm->context.size * LDT_ENTRY_SIZE > PAGE_SIZE) - vfree(mm->context.ldt); - else - kfree(mm->context.ldt); - mm->context.size = 0; - } -} - -static int read_ldt(void __user *ptr, unsigned long bytecount) -{ - int err; - unsigned long size; - struct mm_struct *mm = current->mm; - - if (!mm->context.size) - return 0; - if (bytecount > LDT_ENTRY_SIZE * LDT_ENTRIES) - bytecount = LDT_ENTRY_SIZE * LDT_ENTRIES; - - mutex_lock(&mm->context.lock); - size = mm->context.size * LDT_ENTRY_SIZE; - if (size > bytecount) - size = bytecount; - - err = 0; - if (copy_to_user(ptr, mm->context.ldt, size)) - err = -EFAULT; - mutex_unlock(&mm->context.lock); - if (err < 0) - goto error_return; - if (size != bytecount) { - /* zero-fill the rest */ - if (clear_user(ptr + size, bytecount - size) != 0) { - err = -EFAULT; - goto error_return; - } - } - return bytecount; -error_return: - return err; -} - -static int read_default_ldt(void __user *ptr, unsigned long bytecount) -{ - /* Arbitrary number */ - /* x86-64 default LDT is all zeros */ - if (bytecount > 128) - bytecount = 128; - if (clear_user(ptr, bytecount)) - return -EFAULT; - return bytecount; -} - -static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode) -{ - struct mm_struct *mm = current->mm; - __u32 entry_1, entry_2; - int error; - struct user_desc ldt_info; - - error = -EINVAL; - if (bytecount != sizeof(ldt_info)) - goto out; - error = -EFAULT; - if (copy_from_user(&ldt_info, ptr, sizeof(ldt_info))) - goto out; - - error = -EINVAL; - if (ldt_info.entry_number >= LDT_ENTRIES) - goto out; - if (ldt_info.contents == 3) { - if (oldmode) - goto out; - if (ldt_info.seg_not_present == 0) - goto out; - } - - mutex_lock(&mm->context.lock); - if (ldt_info.entry_number >= mm->context.size) { - error = alloc_ldt(¤t->mm->context, - ldt_info.entry_number + 1, 1); - if (error < 0) - goto out_unlock; - } - - /* Allow LDTs to be cleared by the user. */ - if (ldt_info.base_addr == 0 && ldt_info.limit == 0) { - if (oldmode || LDT_empty(&ldt_info)) { - entry_1 = 0; - entry_2 = 0; - goto install; - } - } - - entry_1 = LDT_entry_a(&ldt_info); - entry_2 = LDT_entry_b(&ldt_info); - if (oldmode) - entry_2 &= ~(1 << 20); - - /* Install the new entry ... */ -install: - write_ldt_entry(mm->context.ldt, ldt_info.entry_number, entry_1, - entry_2); - error = 0; - -out_unlock: - mutex_unlock(&mm->context.lock); -out: - return error; -} - -asmlinkage int sys_modify_ldt(int func, void __user *ptr, - unsigned long bytecount) -{ - int ret = -ENOSYS; - - switch (func) { - case 0: - ret = read_ldt(ptr, bytecount); - break; - case 1: - ret = write_ldt(ptr, bytecount, 1); - break; - case 2: - ret = read_default_ldt(ptr, bytecount); - break; - case 0x11: - ret = write_ldt(ptr, bytecount, 0); - break; - } - return ret; -} -- cgit v1.2.3 From 37e650c7c8a27de533d409b53c29f4135dcc7af6 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 30 Jan 2008 13:30:14 +0100 Subject: x86: rename get_maxlvt to lapic_get_maxlvt Use the same name for the 32 and 64 bit variant. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_64.c | 12 ++++++------ arch/x86/kernel/io_apic_64.c | 2 +- arch/x86/kernel/smpboot_64.c | 2 +- include/asm-x86/apic_64.h | 2 +- 4 files changed, 9 insertions(+), 9 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index fa6cdee6d303..dfeda91fa80c 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -160,7 +160,7 @@ void enable_NMI_through_LVT0 (void * dummy) apic_write(APIC_LVT0, v); } -int get_maxlvt(void) +int lapic_get_maxlvt(void) { unsigned int v, maxlvt; @@ -194,7 +194,7 @@ void clear_local_APIC(void) int maxlvt; unsigned int v; - maxlvt = get_maxlvt(); + maxlvt = lapic_get_maxlvt(); /* * Masking an LVT entry can trigger a local APIC error @@ -333,7 +333,7 @@ int __init verify_local_APIC(void) reg1 = GET_APIC_VERSION(reg0); if (reg1 == 0x00 || reg1 == 0xff) return 0; - reg1 = get_maxlvt(); + reg1 = lapic_get_maxlvt(); if (reg1 < 0x02 || reg1 == 0xff) return 0; @@ -519,7 +519,7 @@ void __cpuinit setup_local_APIC (void) { unsigned oldvalue; - maxlvt = get_maxlvt(); + maxlvt = lapic_get_maxlvt(); oldvalue = apic_read(APIC_ESR); value = ERROR_APIC_VECTOR; // enables sending errors apic_write(APIC_LVTERR, value); @@ -571,7 +571,7 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state) if (!apic_pm_state.active) return 0; - maxlvt = get_maxlvt(); + maxlvt = lapic_get_maxlvt(); apic_pm_state.apic_id = apic_read(APIC_ID); apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI); @@ -605,7 +605,7 @@ static int lapic_resume(struct sys_device *dev) if (!apic_pm_state.active) return 0; - maxlvt = get_maxlvt(); + maxlvt = lapic_get_maxlvt(); local_irq_save(flags); rdmsr(MSR_IA32_APICBASE, l, h); diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index 23a3ac06a23e..d4f5286101a9 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -1069,7 +1069,7 @@ void __apicdebuginit print_local_APIC(void * dummy) v = apic_read(APIC_LVR); printk(KERN_INFO "... APIC VERSION: %08x\n", v); ver = GET_APIC_VERSION(v); - maxlvt = get_maxlvt(); + maxlvt = lapic_get_maxlvt(); v = apic_read(APIC_TASKPRI); printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK); diff --git a/arch/x86/kernel/smpboot_64.c b/arch/x86/kernel/smpboot_64.c index aaf4e1291217..8147b7d4db63 100644 --- a/arch/x86/kernel/smpboot_64.c +++ b/arch/x86/kernel/smpboot_64.c @@ -466,7 +466,7 @@ static int __cpuinit wakeup_secondary_via_INIT(int phys_apicid, unsigned int sta */ Dprintk("#startup loops: %d.\n", num_starts); - maxlvt = get_maxlvt(); + maxlvt = lapic_get_maxlvt(); for (j = 1; j <= num_starts; j++) { Dprintk("Sending STARTUP #%d.\n",j); diff --git a/include/asm-x86/apic_64.h b/include/asm-x86/apic_64.h index 9a0ec02a49a1..b5f850f25114 100644 --- a/include/asm-x86/apic_64.h +++ b/include/asm-x86/apic_64.h @@ -64,7 +64,7 @@ static inline void ack_APIC_irq(void) apic_write(APIC_EOI, 0); } -extern int get_maxlvt(void); +extern int lapic_get_maxlvt(void); extern void clear_local_APIC(void); extern void connect_bsp_APIC(void); extern void disconnect_bsp_APIC(int virt_wire_setup); -- cgit v1.2.3 From 3c6bb07ac1b4174318606a26f0de8ceb9f6d8133 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 30 Jan 2008 13:30:15 +0100 Subject: x86: use u32 for safe_apic_wait_icr_idle() Preperatory patch for merging apic headers. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_64.c | 4 ++-- arch/x86/kernel/smpboot_64.c | 11 ++++++----- include/asm-x86/apic_64.h | 2 +- 3 files changed, 9 insertions(+), 8 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index dfeda91fa80c..3de3764a862c 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -135,9 +135,9 @@ void apic_wait_icr_idle(void) cpu_relax(); } -unsigned int safe_apic_wait_icr_idle(void) +u32 safe_apic_wait_icr_idle(void) { - unsigned int send_status; + u32 send_status; int timeout; timeout = 0; diff --git a/arch/x86/kernel/smpboot_64.c b/arch/x86/kernel/smpboot_64.c index 8147b7d4db63..b36d32ff0b39 100644 --- a/arch/x86/kernel/smpboot_64.c +++ b/arch/x86/kernel/smpboot_64.c @@ -384,19 +384,20 @@ static void inquire_remote_apic(int apicid) unsigned i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 }; char *names[] = { "ID", "VERSION", "SPIV" }; int timeout; - unsigned int status; + u32 status; printk(KERN_INFO "Inquiring remote APIC #%d...\n", apicid); for (i = 0; i < ARRAY_SIZE(regs); i++) { - printk("... APIC #%d %s: ", apicid, names[i]); + printk(KERN_INFO "... APIC #%d %s: ", apicid, names[i]); /* * Wait for idle. */ status = safe_apic_wait_icr_idle(); if (status) - printk("a previous APIC delivery may have failed\n"); + printk(KERN_CONT + "a previous APIC delivery may have failed\n"); apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(apicid)); apic_write(APIC_ICR, APIC_DM_REMRD | regs[i]); @@ -410,10 +411,10 @@ static void inquire_remote_apic(int apicid) switch (status) { case APIC_ICR_RR_VALID: status = apic_read(APIC_RRR); - printk("%08x\n", status); + printk(KERN_CONT "%08x\n", status); break; default: - printk("failed\n"); + printk(KERN_CONT "failed\n"); } } } diff --git a/include/asm-x86/apic_64.h b/include/asm-x86/apic_64.h index b5f850f25114..7bfad0224178 100644 --- a/include/asm-x86/apic_64.h +++ b/include/asm-x86/apic_64.h @@ -49,7 +49,7 @@ static __inline unsigned int apic_read(unsigned long reg) } extern void apic_wait_icr_idle(void); -extern unsigned int safe_apic_wait_icr_idle(void); +extern u32 safe_apic_wait_icr_idle(void); static inline void ack_APIC_irq(void) { -- cgit v1.2.3 From 42e0a9aa5d467188687c6b705412578e53c14af6 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 30 Jan 2008 13:30:15 +0100 Subject: x86: use u32 for some lapic functions Use u32 so 32 and 64bit have the same interface. Andrew Morton: xen, lguest build fixes Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 4 ++-- arch/x86/lguest/boot.c | 4 ++-- arch/x86/xen/enlighten.c | 4 ++-- include/asm-x86/apic_32.h | 16 +++++++--------- include/asm-x86/apic_64.h | 6 +++--- include/asm-x86/paravirt.h | 12 ++++++------ 6 files changed, 22 insertions(+), 24 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index a56c782653be..3a069acb270c 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -135,9 +135,9 @@ void apic_wait_icr_idle(void) cpu_relax(); } -unsigned long safe_apic_wait_icr_idle(void) +u32 safe_apic_wait_icr_idle(void) { - unsigned long send_status; + u32 send_status; int timeout; timeout = 0; diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 92c56117eae5..df04bf884dd4 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -788,11 +788,11 @@ static void lguest_wbinvd(void) * code qualifies for Advanced. It will also never interrupt anything. It * does, however, allow us to get through the Linux boot code. */ #ifdef CONFIG_X86_LOCAL_APIC -static void lguest_apic_write(unsigned long reg, unsigned long v) +static void lguest_apic_write(unsigned long reg, u32 v) { } -static unsigned long lguest_apic_read(unsigned long reg) +static u32 lguest_apic_read(unsigned long reg) { return 0; } diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 79ad15252150..00829401389e 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -521,12 +521,12 @@ static void xen_io_delay(void) } #ifdef CONFIG_X86_LOCAL_APIC -static unsigned long xen_apic_read(unsigned long reg) +static u32 xen_apic_read(unsigned long reg) { return 0; } -static void xen_apic_write(unsigned long reg, unsigned long val) +static void xen_apic_write(unsigned long reg, u32 val) { /* Warn to see if there's any stray references */ WARN_ON(1); diff --git a/include/asm-x86/apic_32.h b/include/asm-x86/apic_32.h index f909e2daf226..649e9a6f6683 100644 --- a/include/asm-x86/apic_32.h +++ b/include/asm-x86/apic_32.h @@ -51,25 +51,23 @@ extern int local_apic_timer_disabled; #define setup_secondary_clock setup_secondary_APIC_clock #endif -static __inline fastcall void native_apic_write(unsigned long reg, - unsigned long v) +static __inline fastcall void native_apic_write(unsigned long reg, u32 v) { - *((volatile unsigned long *)(APIC_BASE+reg)) = v; + *((volatile u32 *)(APIC_BASE + reg)) = v; } -static __inline fastcall void native_apic_write_atomic(unsigned long reg, - unsigned long v) +static __inline fastcall void native_apic_write_atomic(unsigned long reg, u32 v) { - xchg((volatile unsigned long *)(APIC_BASE+reg), v); + (void) xchg((u32 *)(APIC_BASE + reg), v); } -static __inline fastcall unsigned long native_apic_read(unsigned long reg) +static __inline fastcall u32 native_apic_read(unsigned long reg) { - return *((volatile unsigned long *)(APIC_BASE+reg)); + return *((volatile u32 *)(APIC_BASE + reg)); } extern void apic_wait_icr_idle(void); -extern unsigned long safe_apic_wait_icr_idle(void); +extern u32 safe_apic_wait_icr_idle(void); extern int get_physical_broadcast(void); #ifdef CONFIG_X86_GOOD_APIC diff --git a/include/asm-x86/apic_64.h b/include/asm-x86/apic_64.h index 7bfad0224178..9d0c06c4df91 100644 --- a/include/asm-x86/apic_64.h +++ b/include/asm-x86/apic_64.h @@ -38,14 +38,14 @@ struct pt_regs; * Basic functions accessing APICs. */ -static __inline void apic_write(unsigned long reg, unsigned int v) +static __inline void apic_write(unsigned long reg, u32 v) { *((volatile unsigned int *)(APIC_BASE+reg)) = v; } -static __inline unsigned int apic_read(unsigned long reg) +static __inline u32 apic_read(unsigned long reg) { - return *((volatile unsigned int *)(APIC_BASE+reg)); + return *((volatile u32 *)(APIC_BASE+reg)); } extern void apic_wait_icr_idle(void); diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h index f59d370c5df4..19fd3e67b08c 100644 --- a/include/asm-x86/paravirt.h +++ b/include/asm-x86/paravirt.h @@ -150,9 +150,9 @@ struct pv_apic_ops { * Direct APIC operations, principally for VMI. Ideally * these shouldn't be in this interface. */ - void (*apic_write)(unsigned long reg, unsigned long v); - void (*apic_write_atomic)(unsigned long reg, unsigned long v); - unsigned long (*apic_read)(unsigned long reg); + void (*apic_write)(unsigned long reg, u32 v); + void (*apic_write_atomic)(unsigned long reg, u32 v); + u32 (*apic_read)(unsigned long reg); void (*setup_boot_clock)(void); void (*setup_secondary_clock)(void); @@ -690,17 +690,17 @@ static inline void slow_down_io(void) { /* * Basic functions accessing APICs. */ -static inline void apic_write(unsigned long reg, unsigned long v) +static inline void apic_write(unsigned long reg, u32 v) { PVOP_VCALL2(pv_apic_ops.apic_write, reg, v); } -static inline void apic_write_atomic(unsigned long reg, unsigned long v) +static inline void apic_write_atomic(unsigned long reg, u32 v) { PVOP_VCALL2(pv_apic_ops.apic_write_atomic, reg, v); } -static inline unsigned long apic_read(unsigned long reg) +static inline u32 apic_read(unsigned long reg) { return PVOP_CALL1(unsigned long, pv_apic_ops.apic_read, reg); } -- cgit v1.2.3 From 376ff0352c24a5fa47f1250dd60937b5a9077672 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 30 Jan 2008 13:30:16 +0100 Subject: x86: move acpi and pci declarations Move acpi/pci related declarations to the correct headers and remove the duplicate. Build fix from: Andrew Morton Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup_32.c | 3 +-- arch/x86/kernel/setup_64.c | 2 -- include/asm-x86/acpi_32.h | 2 -- include/asm-x86/pci.h | 4 +++- include/asm-x86/proto.h | 3 --- 5 files changed, 4 insertions(+), 10 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c index 51bdc0b1b72e..236d30b264d8 100644 --- a/arch/x86/kernel/setup_32.c +++ b/arch/x86/kernel/setup_32.c @@ -44,6 +44,7 @@ #include #include #include +#include #include