diff options
Diffstat (limited to 'arch/i386')
37 files changed, 754 insertions, 281 deletions
diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index 8dfa3054f10f..1596101cfaf8 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig @@ -173,6 +173,12 @@ config ACPI_SRAT bool default y depends on NUMA && (X86_SUMMIT || X86_GENERICARCH) + select ACPI_NUMA + +config HAVE_ARCH_PARSE_SRAT + bool + default y + depends on ACPI_SRAT config X86_SUMMIT_NUMA bool @@ -224,7 +230,6 @@ config NR_CPUS config SCHED_SMT bool "SMT (Hyperthreading) scheduler support" depends on SMP - default off help SMT scheduler support improves the CPU scheduler's decision making when dealing with Intel Pentium 4 chips with HyperThreading at a diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c index 5ccbf58ec94f..97ca17189af5 100644 --- a/arch/i386/kernel/acpi/boot.c +++ b/arch/i386/kernel/acpi/boot.c @@ -202,6 +202,8 @@ int __init acpi_parse_mcfg(unsigned long phys_addr, unsigned long size) if (mcfg->config[i].base_reserved) { printk(KERN_ERR PREFIX "MMCONFIG not in low 4GB of memory\n"); + kfree(pci_mmcfg_config); + pci_mmcfg_config_num = 0; return -ENODEV; } } diff --git a/arch/i386/kernel/acpi/sleep.c b/arch/i386/kernel/acpi/sleep.c index 1cb2b186a3af..4ee83577bf61 100644 --- a/arch/i386/kernel/acpi/sleep.c +++ b/arch/i386/kernel/acpi/sleep.c @@ -8,30 +8,17 @@ #include <linux/acpi.h> #include <linux/bootmem.h> #include <linux/dmi.h> +#include <linux/cpumask.h> + #include <asm/smp.h> -#include <asm/tlbflush.h> /* address in low memory of the wakeup routine. */ unsigned long acpi_wakeup_address = 0; unsigned long acpi_video_flags; extern char wakeup_start, wakeup_end; -extern void zap_low_mappings(void); - extern unsigned long FASTCALL(acpi_copy_wakeup_routine(unsigned long)); -static void init_low_mapping(pgd_t * pgd, int pgd_limit) -{ - int pgd_ofs = 0; - - while ((pgd_ofs < pgd_limit) - && (pgd_ofs + USER_PTRS_PER_PGD < PTRS_PER_PGD)) { - set_pgd(pgd, *(pgd + USER_PTRS_PER_PGD)); - pgd_ofs++, pgd++; - } - flush_tlb_all(); -} - /** * acpi_save_state_mem - save kernel state * @@ -42,7 +29,6 @@ int acpi_save_state_mem(void) { if (!acpi_wakeup_address) return 1; - init_low_mapping(swapper_pg_dir, USER_PTRS_PER_PGD); memcpy((void *)acpi_wakeup_address, &wakeup_start, &wakeup_end - &wakeup_start); acpi_copy_wakeup_routine(acpi_wakeup_address); @@ -55,7 +41,6 @@ int acpi_save_state_mem(void) */ void acpi_restore_state_mem(void) { - zap_low_mappings(); } /** diff --git a/arch/i386/kernel/acpi/wakeup.S b/arch/i386/kernel/acpi/wakeup.S index 7c74fe0dc93c..9f408eee4e6f 100644 --- a/arch/i386/kernel/acpi/wakeup.S +++ b/arch/i386/kernel/acpi/wakeup.S @@ -56,7 +56,7 @@ wakeup_code: 1: # set up page table - movl $swapper_pg_dir-__PAGE_OFFSET, %eax + movl $swsusp_pg_dir-__PAGE_OFFSET, %eax movl %eax, %cr3 testl $1, real_efer_save_restore - wakeup_code @@ -265,11 +265,6 @@ ENTRY(acpi_copy_wakeup_routine) movl $0x12345678, saved_magic ret -.data -ALIGN -ENTRY(saved_magic) .long 0 -ENTRY(saved_eip) .long 0 - save_registers: leal 4(%esp), %eax movl %eax, saved_context_esp @@ -304,7 +299,11 @@ ret_point: call restore_processor_state ret +.data ALIGN +ENTRY(saved_magic) .long 0 +ENTRY(saved_eip) .long 0 + # saved registers saved_gdt: .long 0,0 saved_idt: .long 0,0 diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c index 3d4b2f3d116a..5ab59c12335b 100644 --- a/arch/i386/kernel/apic.c +++ b/arch/i386/kernel/apic.c @@ -62,7 +62,7 @@ int apic_verbosity; static void apic_pm_activate(void); -int modern_apic(void) +static int modern_apic(void) { unsigned int lvr, version; /* AMD systems use old APIC versions, so check the CPU */ @@ -113,7 +113,7 @@ void __init apic_intr_init(void) } /* Using APIC to generate smp_local_timer_interrupt? */ -int using_apic_timer = 0; +int using_apic_timer __read_mostly = 0; static int enabled_via_apicbase; diff --git a/arch/i386/kernel/apm.c b/arch/i386/kernel/apm.c index df0e1745f189..9e819eb68229 100644 --- a/arch/i386/kernel/apm.c +++ b/arch/i386/kernel/apm.c @@ -374,14 +374,14 @@ static struct { unsigned short segment; } apm_bios_entry; static int clock_slowed; -static int idle_threshold = DEFAULT_IDLE_THRESHOLD; -static int idle_period = DEFAULT_IDLE_PERIOD; +static int idle_threshold __read_mostly = DEFAULT_IDLE_THRESHOLD; +static int idle_period __read_mostly = DEFAULT_IDLE_PERIOD; static int set_pm_idle; static int suspends_pending; static int standbys_pending; static int ignore_sys_suspend; static int ignore_normal_resume; -static int bounce_interval = DEFAULT_BOUNCE_INTERVAL; +static int bounce_interval __read_mostly = DEFAULT_BOUNCE_INTERVAL; #ifdef CONFIG_APM_RTC_IS_GMT # define clock_cmos_diff 0 @@ -390,8 +390,8 @@ static int bounce_interval = DEFAULT_BOUNCE_INTERVAL; static long clock_cmos_diff; static int got_clock_diff; #endif -static int debug; -static int smp; +static int debug __read_mostly; +static int smp __read_mostly; static int apm_disabled = -1; #ifdef CONFIG_SMP static int power_off; @@ -403,8 +403,8 @@ static int realmode_power_off = 1; #else static int realmode_power_off; #endif -static int exit_kapmd; -static int kapmd_running; +static int exit_kapmd __read_mostly; +static int kapmd_running __read_mostly; #ifdef CONFIG_APM_ALLOW_INTS static int allow_ints = 1; #else @@ -416,15 +416,15 @@ static DECLARE_WAIT_QUEUE_HEAD(apm_waitqueue); static DECLARE_WAIT_QUEUE_HEAD(apm_suspend_waitqueue); static struct apm_user * user_list; static DEFINE_SPINLOCK(user_list_lock); -static struct desc_struct bad_bios_desc = { 0, 0x00409200 }; +static const struct desc_struct bad_bios_desc = { 0, 0x00409200 }; -static char driver_version[] = "1.16ac"; /* no spaces */ +static const char driver_version[] = "1.16ac"; /* no spaces */ /* * APM event names taken from the APM 1.2 specification. These are * the message codes that the BIOS uses to tell us about events */ -static char * apm_event_name[] = { +static const char * const apm_event_name[] = { "system standby", "system suspend", "normal resume", @@ -616,7 +616,7 @@ static u8 apm_bios_call(u32 func, u32 ebx_in, u32 ecx_in, * @ecx_in: ECX register value for BIOS call * @eax: EAX register on return from the BIOS call * - * Make a BIOS call that does only returns one value, or just status. + * Make a BIOS call that returns one value only, or just status. * If there is an error, then the error code is returned in AH * (bits 8-15 of eax) and this function returns non-zero. This is * used for simpler BIOS operations. This call may hold interrupts @@ -822,7 +822,7 @@ static void apm_do_busy(void) #define IDLE_CALC_LIMIT (HZ * 100) #define IDLE_LEAKY_MAX 16 -static void (*original_pm_idle)(void); +static void (*original_pm_idle)(void) __read_mostly; /** * apm_cpu_idle - cpu idling for APM capable Linux @@ -1063,7 +1063,8 @@ static int apm_engage_power_management(u_short device, int enable) static int apm_console_blank(int blank) { - int error, i; + int error = APM_NOT_ENGAGED; /* silence gcc */ + int i; u_short state; static const u_short dev[3] = { 0x100, 0x1FF, 0x101 }; @@ -1104,7 +1105,8 @@ static int queue_empty(struct apm_user *as) static apm_event_t get_queued_event(struct apm_user *as) { - as->event_tail = (as->event_tail + 1) % APM_MAX_EVENTS; + if (++as->event_tail >= APM_MAX_EVENTS) + as->event_tail = 0; return as->events[as->event_tail]; } @@ -1118,13 +1120,16 @@ static void queue_event(apm_event_t event, struct apm_user *sender) for (as = user_list; as != NULL; as = as->next) { if ((as == sender) || (!as->reader)) continue; - as->event_head = (as->event_head + 1) % APM_MAX_EVENTS; + if (++as->event_head >= APM_MAX_EVENTS) + as->event_head = 0; + if (as->event_head == as->event_tail) { static int notified; if (notified++ == 0) printk(KERN_ERR "apm: an event queue overflowed\n"); - as->event_tail = (as->event_tail + 1) % APM_MAX_EVENTS; + if (++as->event_tail >= APM_MAX_EVENTS) + as->event_tail = 0; } as->events[as->event_head] = event; if ((!as->suser) || (!as->writer)) @@ -1282,7 +1287,7 @@ static void standby(void) static apm_event_t get_event(void) { int error; - apm_event_t event; + apm_event_t event = APM_NO_EVENTS; /* silence gcc */ apm_eventinfo_t info; static int notified; diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c index a06a49075f10..44f2c5f2dda1 100644 --- a/arch/i386/kernel/cpu/common.c +++ b/arch/i386/kernel/cpu/common.c @@ -11,6 +11,8 @@ #include <asm/msr.h> #include <asm/io.h> #include <asm/mmu_context.h> +#include <asm/mtrr.h> +#include <asm/mce.h> #ifdef CONFIG_X86_LOCAL_APIC #include <asm/mpspec.h> #include <asm/apic.h> diff --git a/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c index 11da3ca237e3..05668e3598c0 100644 --- a/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -174,7 +174,6 @@ acpi_processor_set_performance ( udelay(10); } } else { - i = 0; value = (u32) perf->states[state].status; } @@ -549,14 +548,15 @@ static struct freq_attr* acpi_cpufreq_attr[] = { }; static struct cpufreq_driver acpi_cpufreq_driver = { - .verify = acpi_cpufreq_verify, - .target = acpi_cpufreq_target, - .init = acpi_cpufreq_cpu_init, - .exit = acpi_cpufreq_cpu_exit, - .resume = acpi_cpufreq_resume, - .name = "acpi-cpufreq", - .owner = THIS_MODULE, - .attr = acpi_cpufreq_attr, + .verify = acpi_cpufreq_verify, + .target = acpi_cpufreq_target, + .init = acpi_cpufreq_cpu_init, + .exit = acpi_cpufreq_cpu_exit, + .resume = acpi_cpufreq_resume, + .name = "acpi-cpufreq", + .owner = THIS_MODULE, + .attr = acpi_cpufreq_attr, + .flags = CPUFREQ_STICKY, }; diff --git a/arch/i386/kernel/cpu/cpufreq/cpufreq-nforce2.c b/arch/i386/kernel/cpu/cpufreq/cpufreq-nforce2.c index f275e0d4aee5..0d49d73d1b71 100644 --- a/arch/i386/kernel/cpu/cpufreq/cpufreq-nforce2.c +++ b/arch/i386/kernel/cpu/cpufreq/cpufreq-nforce2.c @@ -1,5 +1,5 @@ /* - * (C) 2004 Sebastian Witt <se.witt@gmx.net> + * (C) 2004-2006 Sebastian Witt <se.witt@gmx.net> * * Licensed under the terms of the GNU GPL License version 2. * Based upon reverse engineered information @@ -90,7 +90,7 @@ static int nforce2_calc_pll(unsigned int fsb) /* Try to calculate multiplier and divider up to 4 times */ while (((mul == 0) || (div == 0)) && (tried <= 3)) { - for (xdiv = 1; xdiv <= 0x80; xdiv++) + for (xdiv = 2; xdiv <= 0x80; xdiv++) for (xmul = 1; xmul <= 0xfe; xmul++) if (nforce2_calc_fsb(NFORCE2_PLL(xmul, xdiv)) == fsb + tried) { @@ -117,8 +117,7 @@ static void nforce2_write_pll(int pll) int temp; /* Set the pll addr. to 0x00 */ - temp = 0x00; - pci_write_config_dword(nforce2_chipset_dev, NFORCE2_PLLADR, temp); + pci_write_config_dword(nforce2_chipset_dev, NFORCE2_PLLADR, 0); /* Now write the value in all 64 registers */ for (temp = 0; temp <= 0x3f; temp++) @@ -266,7 +265,7 @@ static int nforce2_target(struct cpufreq_policy *policy, if (freqs.old == freqs.new) return 0; - dprintk(KERN_INFO "cpufreq: Old CPU frequency %d kHz, new %d kHz\n", + dprintk("Old CPU frequency %d kHz, new %d kHz\n", freqs.old, freqs.new); cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); @@ -278,7 +277,7 @@ static int nforce2_target(struct cpufreq_policy *policy, printk(KERN_ERR "cpufreq: Changing FSB to %d failed\n", target_fsb); else - dprintk(KERN_INFO "cpufreq: Changed FSB successfully to %d\n", + dprintk("Changed FSB successfully to %d\n", target_fsb); /* Enable IRQs */ diff --git a/arch/i386/kernel/cpu/cpufreq/longhaul.c b/arch/i386/kernel/cpu/cpufreq/longhaul.c index 8ef38544453c..146f607e9c44 100644 --- a/arch/i386/kernel/cpu/cpufreq/longhaul.c +++ b/arch/i386/kernel/cpu/cpufreq/longhaul.c @@ -77,13 +77,17 @@ static char speedbuffer[8]; static char *print_speed(int speed) { - if (speed > 1000) { - if (speed%1000 == 0) - sprintf (speedbuffer, "%dGHz", speed/1000); - else - sprintf (speedbuffer, "%d.%dGHz", speed/1000, (speed%1000)/100); - } else - sprintf (speedbuffer, "%dMHz", speed); + if (speed < 1000) { + snprintf(speedbuffer, sizeof(speedbuffer),"%dMHz", speed); + return speedbuffer; + } + + if (speed%1000 == 0) + snprintf(speedbuffer, sizeof(speedbuffer), + "%dGHz", speed/1000); + else + snprintf(speedbuffer, sizeof(speedbuffer), + "%d.%dGHz", speed/1000, (speed%1000)/100); return speedbuffer; } @@ -675,7 +679,7 @@ static int __init longhaul_init(void) static void __exit longhaul_exit(void) { - int i=0; + int i; for (i=0; i < numscales; i++) { if (clock_ratio[i] == maxmult) { diff --git a/arch/i386/kernel/cpu/cpufreq/longrun.c b/arch/i386/kernel/cpu/cpufreq/longrun.c index e3868de4dc2e..b2689514295a 100644 --- a/arch/i386/kernel/cpu/cpufreq/longrun.c +++ b/arch/i386/kernel/cpu/cpufreq/longrun.c @@ -223,7 +223,6 @@ static unsigned int __init longrun_determine_freqs(unsigned int *low_freq, /* set to 0 to try_hi perf_pctg */ msr_lo &= 0xFFFFFF80; msr_hi &= 0xFFFFFF80; - msr_lo |= 0; msr_hi |= try_hi; wrmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi); diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k7.c b/arch/i386/kernel/cpu/cpufreq/powernow-k7.c index 2bf4237cb94e..694d4793bf6a 100644 --- a/arch/i386/kernel/cpu/cpufreq/powernow-k7.c +++ b/arch/i386/kernel/cpu/cpufreq/powernow-k7.c @@ -452,23 +452,23 @@ static int powernow_decode_bios (int maxfid, int startvid) pst = (struct pst_s *) p; - for (i = 0 ; i <psb->numpst; i++) { + for (j=0; j<psb->numpst; j++) { pst = (struct pst_s *) p; number_scales = pst->numpstates; if ((etuple == pst->cpuid) && check_fsb(pst->fsbspeed) && (maxfid==pst->maxfid) && (startvid==pst->startvid)) { - dprintk ("PST:%d (@%p)\n", i, pst); + dprintk ("PST:%d (@%p)\n", j, pst); dprintk (" cpuid: 0x%x fsb: %d maxFID: 0x%x startvid: 0x%x\n", pst->cpuid, pst->fsbspeed, pst->maxfid, pst->startvid); ret = get_ranges ((char *) pst + sizeof (struct pst_s)); return ret; - } else { + unsigned int k; p = (char *) pst + sizeof (struct pst_s); - for (j=0 ; j < number_scales; j++) + for (k=0; k<number_scales; k++) p+=2; } } @@ -581,10 +581,7 @@ static int __init powernow_cpu_init (struct cpufreq_policy *policy) rdmsrl (MSR_K7_FID_VID_STATUS, fidvidstatus.val); - /* recalibrate cpu_khz */ - result = recalibrate_cpu_khz(); - if (result) - return result; + recalibrate_cpu_khz(); fsb = (10 * cpu_khz) / fid_codes[fidvidstatus.bits.CFID]; if (!fsb) { diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c index 71fffa174425..2d6491672559 100644 --- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c @@ -1,5 +1,5 @@ /* - * (c) 2003, 2004, 2005 Advanced Micro Devices, Inc. + * (c) 2003-2006 Advanced Micro Devices, Inc. * Your use of this code is subject to the terms and conditions of the * GNU general public license version 2. See "COPYING" or * http://www.gnu.org/licenses/gpl.html @@ -14,13 +14,13 @@ * Based upon datasheets & sample CPUs kindly provided by AMD. * * Valuable input gratefully received from Dave Jones, Pavel Machek, - * Dominik Brodowski, and others. + * Dominik Brodowski, Jacob Shin, and others. * Originally developed by Paul Devriendt. * Processor information obtained from Chapter 9 (Power and Thermal Management) * of the "BIOS and Kernel Developer's Guide for the AMD Athlon 64 and AMD * Opteron Processors" available for download from www.amd.com * - * Tables for specific CPUs can be infrerred from + * Tables for specific CPUs can be inferred from * http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/30430.pdf */ @@ -46,7 +46,7 @@ #define PFX "powernow-k8: " #define BFX PFX "BIOS error: " -#define VERSION "version 1.60.2" +#define VERSION "version 2.00.00" #include "powernow-k8.h" /* serialize freq changes */ @@ -54,6 +54,8 @@ static DEFINE_MUTEX(fidvid_mutex); static struct powernow_k8_data *powernow_data[NR_CPUS]; +static int cpu_family = CPU_OPTERON; + #ifndef CONFIG_SMP static cpumask_t cpu_core_map[1]; #endif @@ -64,16 +66,36 @@ static u32 find_freq_from_fid(u32 fid) return 800 + (fid * 100); } + /* Return a frequency in KHz, given an input fid */ static u32 find_khz_freq_from_fid(u32 fid) { return 1000 * find_freq_from_fid(fid); } -/* Return a voltage in miliVolts, given an input vid */ -static u32 find_millivolts_from_vid(struct powernow_k8_data *data, u32 vid) +/* Return a frequency in MHz, given an input fid and did */ +static u32 find_freq_from_fiddid(u32 fid, u32 did) +{ + return 100 * (fid + 0x10) >> did; +} + +static u32 find_khz_freq_from_fiddid(u32 fid, u32 did) { - return 1550-vid*25; + return 1000 * find_freq_from_fiddid(fid, did); +} + +static u32 find_fid_from_pstate(u32 pstate) +{ + u32 hi, lo; + rdmsr(MSR_PSTATE_DEF_BASE + pstate, lo, hi); + return lo & HW_PSTATE_FID_MASK; +} + +static u32 find_did_from_pstate(u32 pstate) +{ + u32 hi, lo; + rdmsr(MSR_PSTATE_DEF_BASE + pstate, lo, hi); + return (lo & HW_PSTATE_DID_MASK) >> HW_PSTATE_DID_SHIFT; } /* Return the vco fid for an input fid @@ -98,6 +120,9 @@ static int pending_bit_stuck(void) { u32 lo, hi; + if (cpu_family == CPU_HW_PSTATE) + return 0; + rdmsr(MSR_FIDVID_STATUS, lo, hi); return lo & MSR_S_LO_CHANGE_PENDING ? 1 : 0; } @@ -111,6 +136,14 @@ static int query_current_values_with_pending_wait(struct powernow_k8_data *data) u32 lo, hi; u32 i = 0; + if (cpu_family == CPU_HW_PSTATE) { + rdmsr(MSR_PSTATE_STATUS, lo, hi); + i = lo & HW_PSTATE_MASK; + rdmsr(MSR_PSTATE_DEF_BASE + i, lo, hi); + data->currfid = lo & HW_PSTATE_FID_MASK; + data->currdid = (lo & HW_PSTATE_DID_MASK) >> HW_PSTATE_DID_SHIFT; + return 0; + } do { if (i++ > 10000) { dprintk("detected change pending stuck\n"); @@ -175,7 +208,7 @@ static int write_new_fid(struct powernow_k8_data *data, u32 fid) do { wrmsr(MSR_FIDVID_CTL, lo, data->plllock * PLL_LOCK_CONVERSION); if (i++ > 100) { - printk(KERN_ERR PFX "internal error - pending bit very stuck - no further pstate changes possible\n"); + printk(KERN_ERR PFX "Hardware error - pending bit very stuck - no further pstate changes possible\n"); return 1; } } while (query_current_values_with_pending_wait(data)); @@ -255,7 +288,15 @@ static int decrease_vid_code_by_step(struct powernow_k8_data *data, u32 reqvid, return 0; } -/* Change the fid and vid, by the 3 phases. */ +/* Change hardware pstate by single MSR write */ +static int transition_pstate(struct powernow_k8_data *data, u32 pstate) +{ + wrmsr(MSR_PSTATE_CTRL, pstate, 0); + data->currfid = find_fid_from_pstate(pstate); + return 0; +} + +/* Change Opteron/Athlon64 fid and vid, by the 3 phases. */ static int transition_fid_vid(struct powernow_k8_data *data, u32 reqfid, u32 reqvid) { if (core_voltage_pre_transition(data, reqvid)) @@ -474,26 +515,35 @@ static int check_supported_cpu(unsigned int cpu) goto out; eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE); - if ((eax & CPUID_XFAM) != CPUID_XFAM_K8) + if (((eax & CPUID_XFAM) != CPUID_XFAM_K8) && + ((eax & CPUID_XFAM) < CPUID_XFAM_10H)) goto out; - if (((eax & CPUID_USE_XFAM_XMOD) != CPUID_USE_XFAM_XMOD) || - ((eax & CPUID_XMOD) > CPUID_XMOD_REV_G)) { - printk(KERN_INFO PFX "Processor cpuid %x not supported\n", eax); - goto out; - } + if ((eax & CPUID_XFAM) == CPUID_XFAM_K8) { + if (((eax & CPUID_USE_XFAM_XMOD) != CPUID_USE_XFAM_XMOD) || + ((eax & CPUID_XMOD) > CPUID_XMOD_REV_G)) { + printk(KERN_INFO PFX "Processor cpuid %x not supported\n", eax); + goto out; + } - eax = cpuid_eax(CPUID_GET_MAX_CAPABILITIES); - if (eax < CPUID_FREQ_VOLT_CAPABILITIES) { - printk(KERN_INFO PFX - "No frequency change capabilities detected\n"); - goto out; - } + eax = cpuid_eax(CPUID_GET_MAX_CAPABILITIES); + if (eax < CPUID_FREQ_VOLT_CAPABILITIES) { + printk(KERN_INFO PFX + "No frequency change capabilities detected\n"); + goto out; + } - cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx); - if ((edx & P_STATE_TRANSITION_CAPABLE) != P_STATE_TRANSITION_CAPABLE) { - printk(KERN_INFO PFX "Power state transitions not supported\n"); - goto out; + cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx); + if ((edx & P_STATE_TRANSITION_CAPABLE) != P_STATE_TRANSITION_CAPABLE) { + printk(KERN_INFO PFX "Power state transitions not supported\n"); + goto out; + } + } else { /* must be a HW Pstate capable processor */ + cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx); + if ((edx & USE_HW_PSTATE) == USE_HW_PSTATE) + cpu_family = CPU_HW_PSTATE; + else + goto out; } rc = 1; @@ -547,12 +597,18 @@ static void print_basics(struct powernow_k8_data *data) { int j; for (j = 0; j < data->numps; j++) { - if (data->powernow_table[j].frequency != CPUFREQ_ENTRY_INVALID) - printk(KERN_INFO PFX " %d : fid 0x%x (%d MHz), vid 0x%x (%d mV)\n", j, + if (data->powernow_table[j].frequency != CPUFREQ_ENTRY_INVALID) { + if (cpu_family == CPU_HW_PSTATE) { + printk(KERN_INFO PFX " %d : fid 0x%x gid 0x%x (%d MHz)\n", j, (data->powernow_table[j].index & 0xff00) >> 8, + (data->powernow_table[j].index & 0xff0000) >> 16, + data->powernow_table[j].frequency/1000); + } else { + printk(KERN_INFO PFX " %d : fid 0x%x (%d MHz), vid 0x%x\n", j, data->powernow_table[j].index & 0xff, data->powernow_table[j].frequency/1000, - data->powernow_table[j].index >> 8, - find_millivolts_from_vid(data, data->powernow_table[j].index >> 8)); + data->powernow_table[j].index >> 8); + } + } } if (data->batps) printk(KERN_INFO PFX "Only %d pstates on battery\n", data->batps); @@ -702,7 +758,7 @@ static int find_psb_table(struct powernow_k8_data *data) #ifdef CONFIG_X86_POWERNOW_K8_ACPI static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index) { - if (!data->acpi_data.state_count) + if (!data->acpi_data.state_count || (cpu_family == CPU_HW_PSTATE)) return; data->irt = (data->acpi_data.states[index].control >> IRT_SHIFT) & IRT_MASK; @@ -715,9 +771,8 @@ static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) { - int i; - int cntlofreq = 0; struct cpufreq_frequency_table *powernow_table; + int ret_val; if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) { dprintk("register performance failed: bad ACPI data\n"); @@ -746,13 +801,92 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) goto err_out; } + if (cpu_family == CPU_HW_PSTATE) + ret_val = fill_powernow_table_pstate(data, powernow_table); + else + ret_val = fill_powernow_table_fidvid(data, powernow_table); + if (ret_val) + goto err_out_mem; + + powernow_table[data->acpi_data.state_count].frequency = CPUFREQ_TABLE_END; + powernow_table[data->acpi_data.state_count].index = 0; + data->powernow_table = powernow_table; + + /* fill in data */ + data->numps = data->acpi_data.state_count; + print_basics(data); + powernow_k8_acpi_pst_values(data, 0); + + /* notify BIOS that we exist */ + acpi_processor_notify_smm(THIS_MODULE); + + return 0; + +err_out_mem: + kfree(powernow_table); + +err_out: + acpi_processor_unregister_performance(&data->acpi_data, data->cpu); + + /* data->acpi_data.state_count informs us at ->exit() whether ACPI was used */ + data->acpi_data.state_count = 0; + + return -ENODEV; +} + +static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table) +{ + int i; + + for (i = 0; i < data->acpi_data.state_count; i++) { + u32 index; + u32 hi = 0, lo = 0; + u32 fid; + u32 did; + + index = data->acpi_data.states[i].control & HW_PSTATE_MASK; + if (index > MAX_HW_PSTATE) { + printk(KERN_ERR PFX "invalid pstate %d - bad value %d.\n", i, index); + printk(KERN_ERR PFX "Please report to BIOS manufacturer\n"); + } + rdmsr(MSR_PSTATE_DEF_BASE + index, lo, hi); + if (!(hi & HW_PSTATE_VALID_MASK)) { + dprintk("invalid pstate %d, ignoring\n", index); + powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; + continue; + } + + fid = lo & HW_PSTATE_FID_MASK; + did = (lo & HW_PSTATE_DID_MASK) >> HW_PSTATE_DID_SHIFT; + + dprintk(" %d : fid 0x%x, did 0x%x\n", index, fid, did); + + powernow_table[i].index = index | (fid << HW_FID_INDEX_SHIFT) | (did << HW_DID_INDEX_SHIFT); + + powernow_table[i].frequency = find_khz_freq_from_fiddid(fid, did); + + if (powernow_table[i].frequency != (data->acpi_data.states[i].core_frequency * 1000)) { + printk(KERN_INFO PFX "invalid freq entries %u kHz vs. %u kHz\n", + powernow_table[i].frequency, + (unsigned int) (data->acpi_data.states[i].core_frequency * 1000)); + powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; + continue; + } + } + return 0; +} + +static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table) +{ + int i; + int cntlofreq = 0; for (i = 0; i < data->acpi_data.state_count; i++) { u32 fid; u32 vid; if (data->exttype) { - fid = data->acpi_data.states[i].status & FID_MASK; - vid = (data->acpi_data.states[i].status >> VID_SHIFT) & VID_MASK; + fid = data->acpi_data.states[i].status & EXT_FID_MASK; + vid = (data->acpi_data.states[i].status >> VID_SHIFT) & EXT_VID_MASK; } else { fid = data->acpi_data.states[i].control & FID_MASK; vid = (data->acpi_data.states[i].control >> VID_SHIFT) & VID_MASK; @@ -786,7 +920,7 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) if ((powernow_table[i].frequency != powernow_table[cntlofreq].frequency) || (powernow_table[i].index != powernow_table[cntlofreq].index)) { printk(KERN_ERR PFX "Too many lo freq table entries\n"); - goto err_out_mem; + return 1; } dprintk("double low frequency table entry, ignoring it.\n"); @@ -804,31 +938,7 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) continue; } } - - powernow_table[data->acpi_data.state_count].frequency = CPUFREQ_TABLE_END; - powernow_table[data->acpi_data.state_count].index = 0; - data->powernow_table = powernow_table; - - /* fill in data */ - data->numps = data->acpi_data.state_count; - print_basics(data); - powernow_k8_acpi_pst_values(data, 0); - - /* notify BIOS that we exist */ - acpi_processor_notify_smm(THIS_MODULE); - return 0; - -err_out_mem: - kfree(powernow_table); - -err_out: - acpi_processor_unregister_performance(&data->acpi_data, data->cpu); - - /* data->acpi_data.state_count informs us at ->exit() whether ACPI was used */ - data->acpi_data.state_count = 0; - - return -ENODEV; } static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) @@ -844,20 +954,20 @@ static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned #endif /* CONFIG_X86_POWERNOW_K8_ACPI */ /* Take a frequency, and issue the fid/vid transition command */ -static int transition_frequency(struct powernow_k8_data *data, unsigned int index) +static int transition_frequency_fidvid(struct powernow_k8_data *data, unsigned int index) { - u32 fid; - u32 vid; + u32 fid = 0; + u32 vid = 0; int res, i; struct cpufreq_freqs freqs; dprintk("cpu %d transition to index %u\n", smp_processor_id(), index); + /* fid/vid correctness check for k8 */ /* fid are the lower 8 bits of the index we stored into - * the cpufreq frequency table in find_psb_table, vid are - * the upper 8 bits. + * the cpufreq frequency table in find_psb_table, vid + * are the upper 8 bits. */ - fid = data->powernow_table[index].index & 0xFF; vid = (data->powernow_table[index].index & 0xFF00) >> 8; @@ -881,22 +991,58 @@ static int transition_frequency(struct powernow_k8_data *data, unsigned int inde dprintk("cpu %d, changing to fid 0x%x, vid 0x%x\n", smp_processor_id(), fid, vid); - - freqs.cpu = data->cpu; freqs.old = find_khz_freq_from_fid(data->currfid); freqs.new = find_khz_freq_from_fid(fid); - for_each_cpu_mask(i, cpu_core_map[data->cpu]) { + + for_each_cpu_mask(i, *(data->available_cores)) { freqs.cpu = i; cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); } res = transition_fid_vid(data, fid, vid); - freqs.new = find_khz_freq_from_fid(data->currfid); - for_each_cpu_mask(i, cpu_core_map[data->cpu]) { + + for_each_cpu_mask(i, *(data->available_cores)) { freqs.cpu = i; cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); - } + } + return res; +} + +/* Take a frequency, and issue the hardware pstate transition command */ +static int transition_frequency_pstate(struct powernow_k8_data *data, unsigned int index) +{ + u32 fid = 0; + u32 did = 0; + u32 pstate = 0; + int res, i; + struct cpufreq_freqs freqs; + + dprintk("cpu %d transition to index %u\n", smp_processor_id(), index); + + /* get fid did for hardware pstate transition */ + pstate = index & HW_PSTATE_MASK; + if (pstate > MAX_HW_PSTATE) + return 0; + fid = (index & HW_FID_INDEX_MASK) >> HW_FID_INDEX_SHIFT; + did = (index & HW_DID_INDEX_MASK) >> HW_DID_INDEX_SHIFT; + freqs.old = find_khz_freq_from_fiddid(data->currfid, data->currdid); + freqs.new = find_khz_freq_from_fiddid(fid, did); + + for_each_cpu_mask(i, *(data->available_cores)) { + freqs.cpu = i; + cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + } + + res = transition_pstate(data, pstate); + data->currfid = find_fid_from_pstate(pstate); + data->currdid = find_did_from_pstate(pstate); + freqs.new = find_khz_freq_from_fiddid(data->currfid, data->currdid); + + for_each_cpu_mask(i, *(data->available_cores)) { + freqs.cpu = i; + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + } return res; } @@ -933,18 +1079,21 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi dprintk("targ: cpu %d, %d kHz, min %d, max %d, relation %d\n", pol->cpu, targfreq, pol->min, pol->max, relation); - if (query_current_values_with_pending_wait(data)) { - ret = -EIO; + if (query_current_values_with_pending_wait(data)) goto err_out; - } - dprintk("targ: curr fid 0x%x, vid 0x%x\n", + if (cpu_family == CPU_HW_PSTATE) + dprintk("targ: curr fid 0x%x, did 0x%x\n", + data->currfid, data->currvid); + else { + dprintk("targ: curr fid 0x%x, vid 0x%x\n", data->currfid, data->currvid); - if ((checkvid != data->currvid) || (checkfid != data->currfid)) { - printk(KERN_INFO PFX - "error - out of sync, fix 0x%x 0x%x, vid 0x%x 0x%x\n", - checkfid, data->currfid, checkvid, data->currvid); + if ((checkvid != data->currvid) || (checkfid != data->currfid)) { + printk(KERN_INFO PFX + "error - out of sync, fix 0x%x 0x%x, vid 0x%x 0x%x\n", + checkfid, data->currfid, checkvid, data->currvid); + } } if (cpufreq_frequency_table_target(pol, data->powernow_table, targfreq, relation, &newstate)) @@ -954,7 +1103,11 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi powernow_k8_acpi_pst_values(data, newstate); - if (transition_frequency(data, newstate)) { + if (cpu_family == CPU_HW_PSTATE) + ret = transition_frequency_pstate(data, newstate); + else + ret = transition_frequency_fidvid(data, newstate); + if (ret) { printk(KERN_ERR PFX "transition frequency failed\n"); ret = 1; mutex_unlock(&fidvid_mutex); @@ -962,7 +1115,10 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi } mutex_unlock(&fidvid_mutex); - pol->cur = find_khz_freq_from_fid(data->currfid); + if (cpu_family == CPU_HW_PSTATE) + pol->cur = find_khz_freq_from_fiddid(data->currfid, data->currdid); + else + pol->cur = find_khz_freq_from_fid(data->currfid); ret = 0; err_out: @@ -1007,14 +1163,13 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) * Use the PSB BIOS structure. This is only availabe on * an UP version, and is deprecated by AMD. */ - - if ((num_online_cpus() != 1) || (num_possible_cpus() != 1)) { + if (num_online_cpus() != 1) { printk(KERN_ERR PFX "MP systems not supported by PSB BIOS structure\n"); kfree(data); return -ENODEV; } if (pol->cpu != 0) { - printk(KERN_ERR PFX "init not cpu 0\n"); + printk(KERN_ERR PFX "No _PSS objects for CPU other than CPU0\n"); kfree(data); return -ENODEV; } @@ -1042,20 +1197,28 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) if (query_current_values_with_pending_wait(data)) goto err_out; - fidvid_msr_init(); + if (cpu_family == CPU_OPTERON) + fidvid_msr_init(); /* run on any CPU again */ set_cpus_allowed(current, oldmask); pol->governor = CPUFREQ_DEFAULT_GOVERNOR; - pol->cpus = cpu_core_map[pol->cpu]; + if (cpu_family == CPU_HW_PSTATE) + pol->cpus = cpumask_of_cpu(pol->cpu); + else + pol->cpus = cpu_core_map[pol->cpu]; + data->available_cores = &(pol->cpus); /* Take a crude guess here. * That guess was in microseconds, so multiply with 1000 */ pol->cpuinfo.transition_latency = (((data->rvo + 8) * data->vstable * VST_UNITS_20US) + (3 * (1 << data->irt) * 10)) * 1000; - pol->cur = find_khz_freq_from_fid(data->currfid); + if (cpu_family == CPU_HW_PSTATE) + pol->cur = find_khz_freq_from_fiddid(data->currfid, data->currdid); + else + pol->cur = find_khz_freq_from_fid(data->currfid); dprintk("policy current frequency %d kHz\n", pol->cur); /* min/max the cpu is capable of */ @@ -1069,8 +1232,12 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) cpufreq_frequency_table_get_attr(data->powernow_table, pol->cpu); - printk("cpu_init done, current fid 0x%x, vid 0x%x\n", - data->currfid, data->currvid); + if (cpu_family == CPU_HW_PSTATE) + dprintk("cpu_init done, current fid 0x%x, did 0x%x\n", + data->currfid, data->currdid); + else + dprintk("cpu_init done, current fid 0x%x, vid 0x%x\n", + data->currfid, data->currvid); powernow_data[pol->cpu] = data; @@ -1156,8 +1323,9 @@ static int __cpuinit powernowk8_init(void) } if (supported_cpus == num_online_cpus()) { - printk(KERN_INFO PFX "Found %d AMD Athlon 64 / Opteron " - "processors (" VERSION ")\n", supported_cpus); + printk(KERN_INFO PFX "Found %d %s " + "processors (" VERSION ")\n", supported_cpus, + boot_cpu_data.x86_model_id); return cpufreq_register_driver(&cpufreq_amd64_driver); } diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k8.h b/arch/i386/kernel/cpu/cpufreq/powernow-k8.h index 79a7c5c87edc..0fb2a3001ba5 100644 --- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.h +++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.h @@ -1,5 +1,5 @@ /* - * (c) 2003, 2004, 2005 Advanced Micro Devices, Inc. + * (c) 2003-2006 Advanced Micro Devices, Inc. * Your use of this code is subject to the terms and conditions of the * GNU general public license version 2. See "COPYING" or * http://www.gnu.org/licenses/gpl.html @@ -21,8 +21,8 @@ struct powernow_k8_data { u32 plllock; /* pll lock time, units 1 us */ u32 exttype; /* extended interface = 1 */ - /* keep track of the current fid / vid */ - u32 currvid, currfid; + /* keep track of the current fid / vid or did */ + u32 currvid, currfid, currdid; /* the powernow_table includes all frequency and vid/fid pairings: * fid are the lower 8 bits of the index, vid are the upper 8 bits. @@ -34,6 +34,10 @@ struct powernow_k8_data { * used to determine valid frequency/vid/fid states */ struct acpi_processor_performance acpi_data; #endif + /* we need to keep track of associated cores, but let cpufreq + * handle hotplug events - so just point at cpufreq pol->cpus + * structure */ + cpumask_t *available_cores; }; @@ -43,6 +47,7 @@ struct powernow_k8_data { #define CPUID_XFAM_K8 0 #define CPUID_XMOD 0x000f0000 /* extended model */ #define CPUID_XMOD_REV_G 0x00060000 +#define CPUID_XFAM_10H 0x00100000 /* family 0x10 */ #define CPUID_USE_XFAM_XMOD 0x00000f00 #define CPUID_GET_MAX_CAPABILITIES 0x80000000 #define CPUID_FREQ_VOLT_CAPABILITIES 0x80000007 @@ -79,6 +84,32 @@ struct powernow_k8_data { #define MSR_S_HI_CURRENT_VID 0x0000003f #define MSR_C_HI_STP_GNT_BENIGN 0x00000001 + +/* Hardware Pstate _PSS and MSR definitions */ +#define USE_HW_PSTATE 0x00000080 +#define HW_PSTATE_FID_MASK 0x0000003f +#define HW_PSTATE_DID_MASK 0x000001c0 +#define HW_PSTATE_DID_SHIFT 6 +#define HW_PSTATE_MASK 0x00000007 +#define HW_PSTATE_VALID_MASK 0x80000000 +#define HW_FID_INDEX_SHIFT 8 +#define HW_FID_INDEX_MASK 0x0000ff00 +#define HW_DID_INDEX_SHIFT 16 +#define HW_DID_INDEX_MASK 0x00ff0000 +#define HW_WATTS_MASK 0xff +#define HW_PWR_DVR_MASK 0x300 +#define HW_PWR_DVR_SHIFT 8 +#define HW_PWR_MAX_MULT 3 +#define MAX_HW_PSTATE 8 /* hw pstate supports up to 8 */ +#define MSR_PSTATE_DEF_BASE 0xc0010064 /* base of Pstate MSRs */ +#define MSR_PSTATE_STATUS 0xc0010063 /* Pstate Status MSR */ +#define MSR_PSTATE_CTRL 0xc0010062 /* Pstate control MSR */ + +/* define the two driver architectures */ +#define CPU_OPTERON 0 +#define CPU_HW_PSTATE 1 + + /* * There are restrictions frequencies have to follow: * - only 1 entry in the low fid table ( <=1.4GHz ) @@ -138,7 +169,9 @@ struct powernow_k8_data { #define MVS_MASK 3 #define VST_MASK 0x7f #define VID_MASK 0x1f -#define FID_MASK 0x3f +#define FID_MASK 0x1f +#define EXT_VID_MASK 0x3f +#define EXT_FID_MASK 0x3f /* @@ -182,6 +215,9 @@ static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid); static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index); +static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table); +static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table); + #ifdef CONFIG_SMP static inline void define_siblings(int cpu, cpumask_t cpu_sharedcore_mask[]) { diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c index 4535ca0fe0cf..31c3a5baaa7f 100644 --- a/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c +++ b/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c @@ -2,19 +2,15 @@ * cpufreq driver for Enhanced SpeedStep, as found in Intel's Pentium * M (part of the Centrino chipset). * + * Since the original Pentium M, most new Intel CPUs support Enhanced + * SpeedStep. + * * Despite the "SpeedStep" in the name, this is almost entirely unlike * traditional SpeedStep. * * Modelled on speedstep.c * * Copyright (C) 2003 Jeremy Fitzhardinge <jeremy@goop.org> - * - * WARNING WARNING WARNING - * - * This driver manipulates the PERF_CTL MSR, which is only somewhat - * documented. While it seems to work on my laptop, it has not been - * tested anywhere else, and it may not work for you, do strange - * things or simply crash. */ #include <linux/kernel.h> @@ -36,7 +32,7 @@ #include <asm/cpufeature.h> #define PFX "speedstep-centrino: " -#define MAINTAINER "Jeremy Fitzhardinge <jeremy@goop.org>" +#define MAINTAINER "cpufreq@lists.linux.org.uk" #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-centrino", msg) @@ -250,7 +246,7 @@ static int centrino_cpu_init_table(struct cpufreq_policy *policy) if (model->cpu_id == NULL) { /* No match at all */ - dprintk(KERN_INFO PFX "no support for CPU model \"%s\": " + dprintk("no support for CPU model \"%s\": " "send /proc/cpuinfo to " MAINTAINER "\n", cpu->x86_model_id); return -ENOENT; @@ -258,10 +254,10 @@ static int centrino_cpu_init_table(struct cpufreq_policy *policy) if (model->op_points == NULL) { /* Matched a non-match */ - dprintk(KERN_INFO PFX "no table support for CPU model \"%s\"\n", + dprintk("no table support for CPU model \"%s\"\n", cpu->x86_model_id); #ifndef CONFIG_X86_SPEEDSTEP_CENTRINO_ACPI - dprintk(KERN_INFO PFX "try compiling with CONFIG_X86_SPEEDSTEP_CENTRINO_ACPI enabled\n"); + dprintk("try compiling with CONFIG_X86_SPEEDSTEP_CENTRINO_ACPI enabled\n"); #endif return -ENOENT; } @@ -400,7 +396,7 @@ static int centrino_cpu_init_acpi(struct cpufreq_policy *policy) /* register with ACPI core */ if (acpi_processor_register_performance(p, cpu)) { - dprintk(KERN_INFO PFX "obtaining ACPI data failed\n"); + dprintk(PFX "obtaining ACPI data failed\n"); return -EIO; } policy->cpus = p->shared_cpu_map; @@ -499,7 +495,7 @@ static int centrino_cpu_init_acpi(struct cpufreq_policy *policy) kfree(centrino_model[cpu]); err_unreg: acpi_processor_unregister_performance(p, cpu); - dprintk(KERN_INFO PFX "invalid ACPI data\n"); + dprintk(PFX "invalid ACPI data\n"); return (result); } #else @@ -534,7 +530,7 @@ static int centrino_cpu_init(struct cpufreq_policy *policy) centrino_cpu[policy->cpu] = &cpu_ids[i]; if (!centrino_cpu[policy->cpu]) { - dprintk(KERN_INFO PFX "found unsupported CPU with " + dprintk("found unsupported CPU with " "Enhanced SpeedStep: send /proc/cpuinfo to " MAINTAINER "\n"); return -ENODEV; diff --git a/arch/i386/kernel/cpu/cyrix.c b/arch/i386/kernel/cpu/cyrix.c index 00f2e058797c..fc32c8028e24 100644 --- a/arch/i386/kernel/cpu/cyrix.c +++ b/arch/i386/kernel/cpu/cyrix.c @@ -184,7 +184,7 @@ static void __init geode_configure(void) #ifdef CONFIG_PCI -static struct pci_device_id cyrix_55x0[] = { +static struct pci_device_id __initdata cyrix_55x0[] = { { PCI_DEVICE(PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5510) }, { PCI_DEVICE(PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5520) }, { }, @@ -272,14 +272,15 @@ static void __init init_cyrix(struct cpuinfo_x86 *c) printk(KERN_INFO "Working around Cyrix MediaGX virtual DMA bugs.\n"); isa_dma_bridge_buggy = 2; -#endif - c->x86_cache_size=16; /* Yep 16K integrated cache thats it */ - + + /* * The 5510/5520 companion chips have a funky PIT. */ if (pci_dev_present(cyrix_55x0)) pit_latch_buggy = 1; +#endif + c->x86_cache_size=16; /* Yep 16K integrated cache thats it */ /* GXm supports extended cpuid levels 'ala' AMD */ if (c->cpuid_level == 2) { diff --git a/arch/i386/kernel/cpu/proc.c b/arch/i386/kernel/cpu/proc.c index f94cdb7aca50..a19fcb262dbb 100644 --- a/arch/i386/kernel/cpu/proc.c +++ b/arch/i386/kernel/cpu/proc.c @@ -52,7 +52,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) /* VIA/Cyrix/Centaur-defined */ NULL, NULL, "rng", "rng_en", NULL, NULL, "ace", "ace_en", - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + "ace2", "ace2_en", "phe", "phe_en", "pmm", "pmm_en", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, diff --git a/arch/i386/kernel/i387.c b/arch/i386/kernel/i387.c index d75524758daf..c4351972d9af 100644 --- a/arch/i386/kernel/i387.c +++ b/arch/i386/kernel/i387.c @@ -25,7 +25,7 @@ #define HAVE_HWFP 1 #endif -static unsigned long mxcsr_feature_mask = 0xffffffff; +static unsigned long mxcsr_feature_mask __read_mostly = 0xffffffff; void mxcsr_feature_mask_init(void) { diff --git a/arch/i386/kernel/i8259.c b/arch/i386/kernel/i8259.c index 323ef8ab3244..b7636b96e104 100644 --- a/arch/i386/kernel/i8259.c +++ b/arch/i386/kernel/i8259.c @@ -271,8 +271,8 @@ static int i8259A_shutdown(struct sys_device *dev) * the kernel initialization code can get it * out of. */ - outb(0xff, 0x21); /* mask all of 8259A-1 */ - outb(0xff, 0xA1); /* mask all of 8259A-1 */ + outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ + outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-1 */ return 0; } diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c index d70f2ade5cde..a62df3e764c5 100644 --- a/arch/i386/kernel/io_apic.c +++ b/arch/i386/kernel/io_apic.c @@ -267,7 +267,7 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask) # include <linux/slab.h> /* kmalloc() */ # include <linux/timer.h> /* time_after() */ -# ifdef CONFIG_BALANCED_IRQ_DEBUG +#ifdef CONFIG_BALANCED_IRQ_DEBUG # define TDprintk(x...) do { printk("<%ld:%s:%d>: ", jiffies, __FILE__, __LINE__); printk(x); } while (0) # define Dprintk(x...) do { TDprintk(x); } while (0) # else @@ -275,10 +275,15 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask) # define Dprintk(x...) # endif - #define IRQBALANCE_CHECK_ARCH -999 -static int irqbalance_disabled = IRQBALANCE_CHECK_ARCH; -static int physical_balance = 0; +#define MAX_BALANCED_IRQ_INTERVAL (5*HZ) +#define MIN_BALANCED_IRQ_INTERVAL (HZ/2) +#define BALANCED_IRQ_MORE_DELTA (HZ/10) +#define BALANCED_IRQ_LESS_DELTA (HZ) + +static int irqbalance_disabled __read_mostly = IRQBALANCE_CHECK_ARCH; +static int physical_balance __read_mostly; +static long balanced_irq_interval __read_mostly = MAX_BALANCED_IRQ_INTERVAL; static struct irq_cpu_info { unsigned long * last_irq; @@ -297,12 +302,14 @@ static struct irq_cpu_info { #define CPU_TO_PACKAGEINDEX(i) (first_cpu(cpu_sibling_map[i])) -#define MAX_BALANCED_IRQ_INTERVAL (5*HZ) -#define MIN_BALANCED_IRQ_INTERVAL (HZ/2) -#define BALANCED_IRQ_MORE_DELTA (HZ/10) -#define BALANCED_IRQ_LESS_DELTA (HZ) +static cpumask_t balance_irq_affinity[NR_IRQS] = { + [0 ... NR_IRQS-1] = CPU_MASK_ALL +}; -static long balanced_irq_interval = MAX_BALANCED_IRQ_INTERVAL; +void set_balance_irq_affinity(unsigned int irq, cpumask_t mask) +{ + balance_irq_affinity[irq] = mask; +} static unsigned long move(int curr_cpu, cpumask_t allowed_mask, unsigned long now, int direction) @@ -340,7 +347,7 @@ static inline void balance_irq(int cpu, int irq) if (irqbalance_disabled) return; - cpus_and(allowed_mask, cpu_online_map, irq_affinity[irq]); + cpus_and(allowed_mask, cpu_online_map, balance_irq_affinity[irq]); new_cpu = move(cpu, allowed_mask, now, 1); if (cpu != new_cpu) { set_pending_irq(irq, cpumask_of_cpu(new_cpu)); @@ -529,7 +536,9 @@ tryanotherirq: } } - cpus_and(allowed_mask, cpu_online_map, irq_affinity[selected_irq]); + cpus_and(allowed_mask, + cpu_online_map, + balance_irq_affinity[selected_irq]); target_cpu_mask = cpumask_of_cpu(min_loaded); cpus_and(tmp, target_cpu_mask, allowed_mask); diff --git a/arch/i386/kernel/irq.c b/arch/i386/kernel/irq.c index f3a9c78c4a24..248e922ee13a 100644 --- a/arch/i386/kernel/irq.c +++ b/arch/i386/kernel/irq.c @@ -42,8 +42,8 @@ union irq_ctx { u32 stack[THREAD_SIZE/sizeof(u32)]; }; -static union irq_ctx *hardirq_ctx[NR_CPUS]; -static union irq_ctx *softirq_ctx[NR_CPUS]; +static union irq_ctx *hardirq_ctx[NR_CPUS] __read_mostly; +static union irq_ctx *softirq_ctx[NR_CPUS] __read_mostly; #endif /* diff --git a/arch/i386/kernel/kprobes.c b/arch/i386/kernel/kprobes.c index 38806f427849..395a9a6dff88 100644 --- a/arch/i386/kernel/kprobes.c +++ b/arch/i386/kernel/kprobes.c @@ -607,7 +607,7 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self, struct die_args *args = (struct die_args *)data; int ret = NOTIFY_DONE; - if (args->regs && user_mode(args->regs)) + if (args->regs && user_mode_vm(args->regs)) return ret; switch (val) { diff --git a/arch/i386/kernel/microcode.c b/arch/i386/kernel/microcode.c index e7c138f66c5a..0a865889b2a9 100644 --- a/arch/i386/kernel/microcode.c +++ b/arch/i386/kernel/microcode.c @@ -91,7 +91,10 @@ MODULE_DESCRIPTION("Intel CPU (IA-32) Microcode Update Driver"); MODULE_AUTHOR("Tigran Aivazian <tigran@veritas.com>"); MODULE_LICENSE("GPL"); -#define MICROCODE_VERSION "1.14" +static int verbose; +module_param(verbose, int, 0644); + +#define MICROCODE_VERSION "1.14a" #define DEFAULT_UCODE_DATASIZE (2000) /* 2000 bytes */ #define MC_HEADER_SIZE (sizeof (microcode_header_t)) /* 48 bytes */ @@ -122,14 +125,15 @@ static unsigned int user_buffer_size; /* it's size */ typedef enum mc_error_code { MC_SUCCESS = 0, - MC_NOTFOUND = 1, - MC_MARKED = 2, - MC_ALLOCATED = 3, + MC_IGNORED = 1, + MC_NOTFOUND = 2, + MC_MARKED = 3, + MC_ALLOCATED = 4, } mc_error_code_t; static struct ucode_cpu_info { unsigned int sig; - unsigned int pf; + unsigned int pf, orig_pf; unsigned int rev; unsigned int cksum; mc_error_code_t err; @@ -164,6 +168,7 @@ static void collect_cpu_info (void *unused) rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]); uci->pf = 1 << ((val[1] >> 18) & 7); } + uci->orig_pf = uci->pf; } wrmsr(MSR_IA32_UCODE_REV, 0, 0); @@ -197,21 +202,34 @@ static inline void mark_microcode_update (int cpu_num, microcode_header_t *mc_he pr_debug(" Checksum 0x%x\n", cksum); if (mc_header->rev < uci->rev) { - printk(KERN_ERR "microcode: CPU%d not 'upgrading' to earlier revision" - " 0x%x (current=0x%x)\n", cpu_num, mc_header->rev, uci->rev); - goto out; + if (uci->err == MC_NOTFOUND) { + uci->err = MC_IGNORED; + uci->cksum = mc_header->rev; + } else if (uci->err == MC_IGNORED && uci->cksum < mc_header->rev) + uci->cksum = mc_header->rev; } else if (mc_header->rev == uci->rev) { - /* notify the caller of success on this cpu */ - uci->err = MC_SUCCESS; - goto out; + if (uci->err < MC_MARKED) { + /* notify the caller of success on this cpu */ + uci->err = MC_SUCCESS; + } + } else if (uci->err != MC_ALLOCATED || mc_header->rev > uci->mc->hdr.rev) { + pr_debug("microcode: CPU%d found a matching microcode update with " + " revision 0x%x (current=0x%x)\n", cpu_num, mc_header->rev, uci->rev); + uci->cksum = cksum; + uci->pf = pf; /* keep the original mc pf for cksum calculation */ + uci->err = MC_MARKED; /* found the match */ + for_each_online_cpu(cpu_num) { + if (ucode_cpu_info + cpu_num != uci + && ucode_cpu_info[cpu_num].mc == uci->mc) { + uci->mc = NULL; + break; + } + } + if (uci->mc != NULL) { + vfree(uci->mc); + uci->mc = NULL; + } } - - pr_debug("microcode: CPU%d found a matching microcode update with " - " revision 0x%x (current=0x%x)\n", cpu_num, mc_header->rev, uci->rev); - uci->cksum = cksum; - uci->pf = pf; /* keep the original mc pf for cksum calculation */ - uci->err = MC_MARKED; /* found the match */ -out: return; } @@ -253,10 +271,8 @@ static int find_matching_ucodes (void) for_each_online_cpu(cpu_num) { struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; - if (uci->err != MC_NOTFOUND) /* already found a match or not an online cpu*/ - continue; - if (sigmatch(mc_header.sig, uci->sig, mc_header.pf, uci->pf)) + if (sigmatch(mc_header.sig, uci->sig, mc_header.pf, uci->orig_pf)) mark_microcode_update(cpu_num, &mc_header, mc_header.sig, mc_header.pf, mc_header.cksum); } @@ -295,9 +311,8 @@ static int find_matching_ucodes (void) } for_each_online_cpu(cpu_num) { struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; - if (uci->err != MC_NOTFOUND) /* already found a match or not an online cpu*/ - continue; - if (sigmatch(ext_sig.sig, uci->sig, ext_sig.pf, uci->pf)) { + + if (sigmatch(ext_sig.sig, uci->sig, ext_sig.pf, uci->orig_pf)) { mark_microcode_update(cpu_num, &mc_header, ext_sig.sig, ext_sig.pf, ext_sig.cksum); } } @@ -368,6 +383,13 @@ static void do_update_one (void * unused) struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; if (uci->mc == NULL) { + if (verbose) { + if (uci->err == MC_SUCCESS) + printk(KERN_INFO "microcode: CPU%d already at revision 0x%x\n", + cpu_num, uci->rev); + else + printk(KERN_INFO "microcode: No new microcode data for CPU%d\n", cpu_num); + } return; } @@ -426,6 +448,9 @@ out_free: ucode_cpu_info[j].mc = NULL; } } + if (ucode_cpu_info[i].err == MC_IGNORED && verbose) + printk(KERN_WARNING "microcode: CPU%d not 'upgrading' to earlier revision" + " 0x%x (current=0x%x)\n", i, ucode_cpu_info[i].cksum, ucode_cpu_info[i].rev); } out: return error; diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index dd6b0e3386ce..e6023970aa40 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -48,6 +48,7 @@ #include <linux/crash_dump.h> #include <linux/dmi.h> #include <linux/pfn.h> +#include <linux/suspend.h> #include <video/edid.h> @@ -1434,6 +1435,111 @@ static void set_mca_bus(int x) static void set_mca_bus(int x) { } #endif +#ifdef CONFIG_SOFTWARE_SUSPEND +static void __init mark_nosave_page_range(unsigned long start, unsigned long end) +{ + struct page *page; + while (start <= end) { + page = pfn_to_page(start); + SetPageNosave(page); + start++; + } +} + +static void __init e820_nosave_reserved_pages(void) +{ + int i; + unsigned long r_start = 0, r_end = 0; + + /* Assume e820 map is sorted */ + for (i = 0; i < e820.nr_map; i++) { + struct e820entry *ei = &e820.map[i]; + unsigned long start, end; + + start = PFN_DOWN(ei->addr); + end = PFN_UP(ei->addr + ei->size); + if (start >= end) + continue; + if (ei->type == E820_RESERVED) + continue; + r_end = start; + /* + * Highmem 'Reserved' pages are marked as reserved, swsusp + * will not save/restore them, so we ignore these pages here. + */ + if (r_end > max_low_pfn) + r_end = max_low_pfn; + if (r_end > r_start) + mark_nosave_page_range(r_start, r_end-1); + if (r_end >= max_low_pfn) + break; + r_start = end; + } +} + +static void __init e820_save_acpi_pages(void) +{ + int i; + + /* Assume e820 map is sorted */ + for (i = 0; i < e820.nr_map; i++) { + struct e820entry *ei = &e820.map[i]; + unsigned long start, end; + + start = ei->addr; + end = ei->addr + ei->size; + if (start >= end) + continue; + if (ei->type != E820_ACPI && ei->type != E820_NVS) + continue; + /* + * If the region is below max_low_pfn, it will be + * saved/restored by swsusp follow 'RAM' type. + */ + if (start < (max_low_pfn << PAGE_SHIFT)) + start = max_low_pfn << PAGE_SHIFT; + /* + * Highmem pages (ACPI NVS/Data) are reserved, but swsusp + * highmem save/restore will not save/restore them. We marked + * them as arch saveable pages here + */ + if (end > start) + swsusp_add_arch_pages(start, end); + } +} + +extern char __start_rodata, __end_rodata; +/* + * BIOS reserved region/hole - no save/restore + * ACPI NVS - save/restore + * ACPI Data - this is a little tricky, the mem could be used by OS after OS + * reads tables from the region, but anyway save/restore the memory hasn't any + * side effect and Linux runtime module load/unload might use it. + * kernel rodata - no save/restore (kernel rodata isn't changed) + */ +static int __init mark_nosave_pages(void) +{ + unsigned long pfn_start, pfn_end; + + /* FIXME: provide a version for efi BIOS */ + if (efi_enabled) + return 0; + /* BIOS reserved regions & holes */ + e820_nosave_reserved_pages(); + + /* kernel rodata */ + pfn_start = PFN_UP(virt_to_phys(&__start_rodata)); + pfn_end = PFN_DOWN(virt_to_phys(&__end_rodata)); + mark_nosave_page_range(pfn_start, pfn_end-1); + + /* record ACPI Data/NVS as saveable */ + e820_save_acpi_pages(); + + return 0; +} +core_initcall(mark_nosave_pages); +#endif + /* * Determine if we were loaded by an EFI loader. If so, then we have also been * passed the efi memmap, systab, etc., so we should use these data structures diff --git a/arch/i386/kernel/srat.c b/arch/i386/kernel/srat.c index 52b3ed5d2cb5..989c85255dbe 100644 --- a/arch/i386/kernel/srat.c +++ b/arch/i386/kernel/srat.c @@ -39,7 +39,6 @@ #define NODE_ARRAY_OFFSET(x) ((x) % 8) /* 8 bits/char */ #define BMAP_SET(bmap, bit) ((bmap)[NODE_ARRAY_INDEX(bit)] |= 1 << NODE_ARRAY_OFFSET(bit)) #define BMAP_TEST(bmap, bit) ((bmap)[NODE_ARRAY_INDEX(bit)] & (1 << NODE_ARRAY_OFFSET(bit))) -#define MAX_PXM_DOMAINS 256 /* 1 byte and no promises about values */ /* bitmap length; _PXM is at most 255 */ #define PXM_BITMAP_LEN (MAX_PXM_DOMAINS / 8) static u8 pxm_bitmap[PXM_BITMAP_LEN]; /* bitmap of proximity domains */ @@ -213,19 +212,11 @@ static __init void node_read_chunk(int nid, struct node_memory_chunk_s *memory_c node_end_pfn[nid] = memory_chunk->end_pfn; } -static u8 pxm_to_nid_map[MAX_PXM_DOMAINS];/* _PXM to logical node ID map */ - -int pxm_to_node(int pxm) -{ - return pxm_to_nid_map[pxm]; -} - /* Parse the ACPI Static Resource Affinity Table */ static int __init acpi20_parse_srat(struct acpi_table_srat *sratp) { u8 *start, *end, *p; int i, j, nid; - u8 nid_to_pxm_map[MAX_NUMNODES];/* logical node ID to _PXM map */ start = (u8 *)(&(sratp->reserved) + 1); /* skip header */ p = start; @@ -235,10 +226,6 @@ static int __init acpi20_parse_srat(struct acpi_table_srat *sratp) memset(node_memory_chunk, 0, sizeof(node_memory_chunk)); memset(zholes_size, 0, sizeof(zholes_size)); - /* -1 in these maps means not available */ - memset(pxm_to_nid_map, -1, sizeof(pxm_to_nid_map)); - memset(nid_to_pxm_map, -1, sizeof(nid_to_pxm_map)); - num_memory_chunks = 0; while (p < end) { switch (*p) { @@ -278,9 +265,7 @@ static int __init acpi20_parse_srat(struct acpi_table_srat *sratp) nodes_clear(node_online_map); for (i = 0; i < MAX_PXM_DOMAINS; i++) { if (BMAP_TEST(pxm_bitmap, i)) { - nid = num_online_nodes(); - pxm_to_nid_map[i] = nid; - nid_to_pxm_map[nid] = i; + int nid = acpi_map_pxm_to_node(i); node_set_online(nid); } } @@ -288,7 +273,7 @@ static int __init acpi20_parse_srat(struct acpi_table_srat *sratp) /* set cnode id in memory chunk structure */ for (i = 0; i < num_memory_chunks; i++) - node_memory_chunk[i].nid = pxm_to_nid_map[node_memory_chunk[i].pxm]; + node_memory_chunk[i].nid = pxm_to_node(node_memory_chunk[i].pxm); printk("pxm bitmap: "); for (i = 0; i < sizeof(pxm_bitmap); i++) { diff --git a/arch/i386/kernel/syscall_table.S b/arch/i386/kernel/syscall_table.S index af56987f69b0..dd63d4775398 100644 --- a/arch/i386/kernel/syscall_table.S +++ b/arch/i386/kernel/syscall_table.S @@ -316,3 +316,4 @@ ENTRY(sys_call_table) .long sys_sync_file_range .long sys_tee /* 315 */ .long sys_vmsplice + .long sys_move_pages diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index 0e498369f35e..dcc14477af1f 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -149,6 +149,12 @@ static inline unsigned long print_context_stack(struct thread_info *tinfo, while (valid_stack_ptr(tinfo, (void *)ebp)) { addr = *(unsigned long *)(ebp + 4); printed = print_addr_and_symbol(addr, log_lvl, printed); + /* + * break out of recursive entries (such as + * end_of_stack_stop_unwind_function): + */ + if (ebp == *(unsigned long *)ebp) + break; ebp = *(unsigned long *)ebp; } #else @@ -268,8 +274,9 @@ void show_registers(struct pt_regs *regs) regs->esi, regs->edi, regs->ebp, esp); printk(KERN_EMERG "ds: %04x es: %04x ss: %04x\n", regs->xds & 0xffff, regs->xes & 0xffff, ss); - printk(KERN_EMERG "Process %s (pid: %d, threadinfo=%p task=%p)", - current->comm, current->pid, current_thread_info(), current); + printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)", + TASK_COMM_LEN, current->comm, current->pid, + current_thread_info(), current, current->thread_info); /* * When in-kernel, we also print out the stack and code at the * time of the fault.. diff --git a/arch/i386/lib/usercopy.c b/arch/i386/lib/usercopy.c index 4cf981d70f45..6979297ce278 100644 --- a/arch/i386/lib/usercopy.c +++ b/arch/i386/lib/usercopy.c @@ -425,15 +425,121 @@ __copy_user_zeroing_intel(void *to, const void __user *from, unsigned long size) : "eax", "edx", "memory"); return size; } + +/* + * Non Temporal Hint version of __copy_user_zeroing_intel. It is cache aware. + * hyoshiok@miraclelinux.com + */ + +static unsigned long __copy_user_zeroing_intel_nocache(void *to, + const void __user *from, unsigned long size) +{ + int d0, d1; + + __asm__ __volatile__( + " .align 2,0x90\n" + "0: movl 32(%4), %%eax\n" + " cmpl $67, %0\n" + " jbe 2f\n" + "1: movl 64(%4), %%eax\n" + " .align 2,0x90\n" + "2: movl 0(%4), %%eax\n" + "21: movl 4(%4), %%edx\n" + " movnti %%eax, 0(%3)\n" + " movnti %%edx, 4(%3)\n" + "3: movl 8(%4), %%eax\n" + "31: movl 12(%4),%%edx\n" + " movnti %%eax, 8(%3)\n" + " movnti %%edx, 12(%3)\n" + "4: movl 16(%4), %%eax\n" + "41: movl 20(%4), %%edx\n" + " movnti %%eax, 16(%3)\n" + " movnti %%edx, 20(%3)\n" + "10: movl 24(%4), %%eax\n" + "51: movl 28(%4), %%edx\n" + " movnti %%eax, 24(%3)\n" + " movnti %%edx, 28(%3)\n" + "11: movl 32(%4), %%eax\n" + "61: movl 36(%4), %%edx\n" + " movnti %%eax, 32(%3)\n" + " movnti %%edx, 36(%3)\n" + "12: movl 40(%4), %%eax\n" + "71: movl 44(%4), %%edx\n" + " movnti %%eax, 40(%3)\n" + " movnti %%edx, 44(%3)\n" + "13: movl 48(%4), %%eax\n" + "81: movl 52(%4), %%edx\n" + " movnti %%eax, 48(%3)\n" + " movnti %%edx, 52(%3)\n" + "14: movl 56(%4), %%eax\n" + "91: movl 60(%4), %%edx\n" + " movnti %%eax, 56(%3)\n" + " movnti %%edx, 60(%3)\n" + " addl $-64, %0\n" + " addl $64, %4\n" + " addl $64, %3\n" + " cmpl $63, %0\n" + " ja 0b\n" + " sfence \n" + "5: movl %0, %%eax\n" + " shrl $2, %0\n" + " andl $3, %%eax\n" + " cld\n" + "6: rep; movsl\n" + " movl %%eax,%0\n" + "7: rep; movsb\n" + "8:\n" + ".section .fixup,\"ax\"\n" + "9: lea 0(%%eax,%0,4),%0\n" + "16: pushl %0\n" + " pushl %%eax\n" + " xorl %%eax,%%eax\n" + " rep; stosb\n" + " popl %%eax\n" + " popl %0\n" + " jmp 8b\n" + ".previous\n" + ".section __ex_table,\"a\"\n" + " .align 4\n" + " .long 0b,16b\n" + " .long 1b,16b\n" + " .long 2b,16b\n" + " .long 21b,16b\n" + " .long 3b,16b\n" + " .long 31b,16b\n" + " .long 4b,16b\n" + " .long 41b,16b\n" + " .long 10b,16b\n" + " .long 51b,16b\n" + " .long 11b,16b\n" + " .long 61b,16b\n" + " .long 12b,16b\n" + " .long 71b,16b\n" + " .long 13b,16b\n" + " .long 81b,16b\n" + " .long 14b,16b\n" + " .long 91b,16b\n" + " .long 6b,9b\n" + " .long 7b,16b\n" + ".previous" + : "=&c"(size), "=&D" (d0), "=&S" (d1) + : "1"(to), "2"(from), "0"(size) + : "eax", "edx", "memory"); + return size; +} + #else + /* * Leave these declared but undefined. They should not be any references to * them */ -unsigned long -__copy_user_zeroing_intel(void *to, const void __user *from, unsigned long size); -unsigned long -__copy_user_intel(void __user *to, const void *from, unsigned long size); +unsigned long __copy_user_zeroing_intel(void *to, const void __user *from, + unsigned long size); +unsigned long __copy_user_intel(void __user *to, const void *from, + unsigned long size); +unsigned long __copy_user_zeroing_intel_nocache(void *to, + const void __user *from, unsigned long size); #endif /* CONFIG_X86_INTEL_USERCOPY */ /* Generic arbitrary sized copy. */ @@ -515,8 +621,8 @@ do { \ : "memory"); \ } while (0) - -unsigned long __copy_to_user_ll(void __user *to, const void *from, unsigned long n) +unsigned long __copy_to_user_ll(void __user *to, const void *from, + unsigned long n) { BUG_ON((long) n < 0); #ifndef CONFIG_X86_WP_WORKS_OK @@ -576,8 +682,8 @@ survive: } EXPORT_SYMBOL(__copy_to_user_ll); -unsigned long -__copy_from_user_ll(void *to, const void __user *from, unsigned long n) +unsigned long __copy_from_user_ll(void *to, const void __user *from, + unsigned long n) { BUG_ON((long)n < 0); if (movsl_is_ok(to, from, n)) @@ -588,6 +694,21 @@ __copy_from_user_ll(void *to, const void __user *from, unsigned long n) } EXPORT_SYMBOL(__copy_from_user_ll); +unsigned long __copy_from_user_ll_nocache(void *to, const void __user *from, + unsigned long n) +{ + BUG_ON((long)n < 0); +#ifdef CONFIG_X86_INTEL_USERCOPY + if ( n > 64 && cpu_has_xmm2) + n = __copy_user_zeroing_intel_nocache(to, from, n); + else + __copy_user_zeroing(to, from, n); +#else + __copy_user_zeroing(to, from, n); +#endif + return n; +} + /** * copy_to_user: - Copy a block of data into user space. * @to: Destination address, in user space. diff --git a/arch/i386/mm/fault.c b/arch/i386/mm/fault.c index 7f0fcf219a26..bd6fe96cc16d 100644 --- a/arch/i386/mm/fault.c +++ b/arch/i386/mm/fault.c @@ -77,12 +77,15 @@ static inline unsigned long get_segment_eip(struct pt_regs *regs, unsigned seg = regs->xcs & 0xffff; u32 seg_ar, seg_limit, base, *desc; + /* Unlikely, but must come before segment checks. */ + if (unlikely(regs->eflags & VM_MASK)) { + base = seg << 4; + *eip_limit = base + 0xffff; + return base + (eip & 0xffff); + } + /* The standard kernel/user address space limit. */ *eip_limit = (seg & 3) ? USER_DS.seg : KERNEL_DS.seg; - - /* Unlikely, but must come before segment checks. */ - if (unlikely((regs->eflags & VM_MASK) != 0)) - return eip + (seg << 4); /* By far the most common cases. */ if (likely(seg == __USER_CS || seg == __KERNEL_CS)) @@ -380,12 +383,12 @@ fastcall void __kprobes do_page_fault(struct pt_regs *regs, goto bad_area; if (error_code & 4) { /* - * accessing the stack below %esp is always a bug. - * The "+ 32" is there due to some instructions (like - * pusha) doing post-decrement on the stack and that - * doesn't show up until later.. + * Accessing the stack below %esp is always a bug. + * The large cushion allows instructions like enter + * and pusha to work. ("enter $65535,$31" pushes + * 32 pointers and then decrements %esp by 65535.) */ - if (address + 32 < regs->esp) + if (address + 65536 + 32 * sizeof(unsigned long) < regs->esp) goto bad_area; } if (expand_stack(vma, address)) diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c index 3df1371d4520..bf19513f0cea 100644 --- a/arch/i386/mm/init.c +++ b/arch/i386/mm/init.c @@ -29,6 +29,7 @@ #include <linux/efi.h> #include <linux/memory_hotplug.h> #include <linux/initrd.h> +#include <linux/cpumask.h> #include <asm/processor.h> #include <asm/system.h> @@ -384,7 +385,7 @@ static void __init pagetable_init (void) #endif } -#ifdef CONFIG_SOFTWARE_SUSPEND +#if defined(CONFIG_SOFTWARE_SUSPEND) || defined(CONFIG_ACPI_SLEEP) /* * Swap suspend & friends need this for resume because things like the intel-agp * driver might have split up a kernel 4MB mapping. diff --git a/arch/i386/mm/pageattr.c b/arch/i386/mm/pageattr.c index 92c3d9f0e731..0887b34bc59b 100644 --- a/arch/i386/mm/pageattr.c +++ b/arch/i386/mm/pageattr.c @@ -209,19 +209,19 @@ int change_page_attr(struct page *page, int numpages, pgprot_t prot) } void global_flush_tlb(void) -{ - LIST_HEAD(l); +{ + struct list_head l; struct page *pg, *next; BUG_ON(irqs_disabled()); spin_lock_irq(&cpa_lock); - list_splice_init(&df_list, &l); + list_replace_init(&df_list, &l); spin_unlock_irq(&cpa_lock); flush_map(); list_for_each_entry_safe(pg, next, &l, lru) __free_page(pg); -} +} #ifdef CONFIG_DEBUG_PAGEALLOC void kernel_map_pages(struct page *page, int numpages, int enable) diff --git a/arch/i386/pci/common.c b/arch/i386/pci/common.c index dbece776c5b2..c624b61e1104 100644 --- a/arch/i386/pci/common.c +++ b/arch/i386/pci/common.c @@ -288,6 +288,7 @@ int pcibios_enable_device(struct pci_dev *dev, int mask) void pcibios_disable_device (struct pci_dev *dev) { + pcibios_disable_resources(dev); if (pcibios_disable_irq) pcibios_disable_irq(dev); } diff --git a/arch/i386/pci/i386.c b/arch/i386/pci/i386.c index ed2c8c899bd3..7852827a599b 100644 --- a/arch/i386/pci/i386.c +++ b/arch/i386/pci/i386.c @@ -242,6 +242,15 @@ int pcibios_enable_resources(struct pci_dev *dev, int mask) return 0; } +void pcibios_disable_resources(struct pci_dev *dev) +{ + u16 cmd; + + pci_read_config_word(dev, PCI_COMMAND, &cmd); + cmd &= ~(PCI_COMMAND_IO | PCI_COMMAND_MEMORY); + pci_write_config_word(dev, PCI_COMMAND, cmd); +} + /* * If we set up a device for bus mastering, we need to check the latency * timer as certain crappy BIOSes forget to set it properly. diff --git a/arch/i386/pci/irq.c b/arch/i386/pci/irq.c index 06dab00aaadc..8ce69508f3c7 100644 --- a/arch/i386/pci/irq.c +++ b/arch/i386/pci/irq.c @@ -198,14 +198,14 @@ static void write_config_nybble(struct pci_dev *router, unsigned offset, unsigne */ static int pirq_ali_get(struct pci_dev *router, struct pci_dev *dev, int pirq) { - static unsigned char irqmap[16] = { 0, 9, 3, 10, 4, 5, 7, 6, 1, 11, 0, 12, 0, 14, 0, 15 }; + static const unsigned char irqmap[16] = { 0, 9, 3, 10, 4, 5, 7, 6, 1, 11, 0, 12, 0, 14, 0, 15 }; return irqmap[read_config_nybble(router, 0x48, pirq-1)]; } static int pirq_ali_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) { - static unsigned char irqmap[16] = { 0, 8, 0, 2, 4, 5, 7, 6, 0, 1, 3, 9, 11, 0, 13, 15 }; + static const unsigned char irqmap[16] = { 0, 8, 0, 2, 4, 5, 7, 6, 0, 1, 3, 9, 11, 0, 13, 15 }; unsigned int val = irqmap[irq]; if (val) { @@ -256,13 +256,13 @@ static int pirq_via_set(struct pci_dev *router, struct pci_dev *dev, int pirq, i */ static int pirq_via586_get(struct pci_dev *router, struct pci_dev *dev, int pirq) { - static unsigned int pirqmap[4] = { 3, 2, 5, 1 }; + static const unsigned int pirqmap[4] = { 3, 2, 5, 1 }; return read_config_nybble(router, 0x55, pirqmap[pirq-1]); } static int pirq_via586_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) { - static unsigned int pirqmap[4] = { 3, 2, 5, 1 }; + static const unsigned int pirqmap[4] = { 3, 2, 5, 1 }; write_config_nybble(router, 0x55, pirqmap[pirq-1], irq); return 1; } @@ -274,13 +274,13 @@ static int pirq_via586_set(struct pci_dev *router, struct pci_dev *dev, int pirq */ static int pirq_ite_get(struct pci_dev *router, struct pci_dev *dev, int pirq) { - static unsigned char pirqmap[4] = { 1, 0, 2, 3 }; + static const unsigned char pirqmap[4] = { 1, 0, 2, 3 }; return read_config_nybble(router,0x43, pirqmap[pirq-1]); } static int pirq_ite_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) { - static unsigned char pirqmap[4] = { 1, 0, 2, 3 }; + static const unsigned char pirqmap[4] = { 1, 0, 2, 3 }; write_config_nybble(router, 0x43, pirqmap[pirq-1], irq); return 1; } @@ -505,7 +505,7 @@ static int pirq_bios_set(struct pci_dev *router, struct pci_dev *dev, int pirq, static __init int intel_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) { - static struct pci_device_id pirq_440gx[] = { + static struct pci_device_id __initdata pirq_440gx[] = { { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443GX_0) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443GX_2) }, { }, @@ -880,6 +880,7 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign) ((!(pci_probe & PCI_USE_PIRQ_MASK)) || ((1 << irq) & mask)) ) { DBG(" -> got IRQ %d\n", irq); msg = "Found"; + eisa_set_level_irq(irq); } else if (newirq && r->set && (dev->class >> 8) != PCI_CLASS_DISPLAY_VGA) { DBG(" -> assigning IRQ %d", newirq); if (r->set(pirq_router_dev, dev, pirq, newirq)) { diff --git a/arch/i386/pci/mmconfig.c b/arch/i386/pci/mmconfig.c index 6b1ea0c9a570..e545b0992c48 100644 --- a/arch/i386/pci/mmconfig.c +++ b/arch/i386/pci/mmconfig.c @@ -15,7 +15,9 @@ #include <asm/e820.h> #include "pci.h" -#define MMCONFIG_APER_SIZE (256*1024*1024) +/* aperture is up to 256MB but BIOS may reserve less */ +#define MMCONFIG_APER_MIN (2 * 1024*1024) +#define MMCONFIG_APER_MAX (256 * 1024*1024) /* Assume systems with more busses have correct MCFG */ #define MAX_CHECK_BUS 16 @@ -197,9 +199,10 @@ void __init pci_mmcfg_init(void) return; if (!e820_all_mapped(pci_mmcfg_config[0].base_address, - pci_mmcfg_config[0].base_address + MMCONFIG_APER_SIZE, + pci_mmcfg_config[0].base_address + MMCONFIG_APER_MIN, E820_RESERVED)) { - printk(KERN_ERR "PCI: BIOS Bug: MCFG area is not E820-reserved\n"); + printk(KERN_ERR "PCI: BIOS Bug: MCFG area at %x is not E820-reserved\n", + pci_mmcfg_config[0].base_address); printk(KERN_ERR "PCI: Not using MMCONFIG.\n"); return; } diff --git a/arch/i386/pci/pci.h b/arch/i386/pci/pci.h index 12035e29108b..12bf3d8dda29 100644 --- a/arch/i386/pci/pci.h +++ b/arch/i386/pci/pci.h @@ -35,6 +35,7 @@ extern unsigned int pcibios_max_latency; void pcibios_resource_survey(void); int pcibios_enable_resources(struct pci_dev *, int); +void pcibios_disable_resources(struct pci_dev *); /* pci-pc.c */ diff --git a/arch/i386/power/cpu.c b/arch/i386/power/cpu.c index 79b2370c7fac..e6517915fe3e 100644 --- a/arch/i386/power/cpu.c +++ b/arch/i386/power/cpu.c @@ -10,6 +10,8 @@ #include <linux/config.h> #include <linux/module.h> #include <linux/suspend.h> +#include <asm/mtrr.h> +#include <asm/mce.h> static struct saved_context saved_context; |