diff options
Diffstat (limited to 'arch')
120 files changed, 1114 insertions, 1933 deletions
diff --git a/arch/alpha/include/asm/perf_event.h b/arch/alpha/include/asm/perf_event.h index fe792ca818f6..5996e7a6757e 100644 --- a/arch/alpha/include/asm/perf_event.h +++ b/arch/alpha/include/asm/perf_event.h @@ -1,10 +1,4 @@ #ifndef __ASM_ALPHA_PERF_EVENT_H #define __ASM_ALPHA_PERF_EVENT_H -#ifdef CONFIG_PERF_EVENTS -extern void init_hw_perf_events(void); -#else -static inline void init_hw_perf_events(void) { } -#endif - #endif /* __ASM_ALPHA_PERF_EVENT_H */ diff --git a/arch/alpha/kernel/irq_alpha.c b/arch/alpha/kernel/irq_alpha.c index 5f77afb88e89..4c8bb374eb0a 100644 --- a/arch/alpha/kernel/irq_alpha.c +++ b/arch/alpha/kernel/irq_alpha.c @@ -112,8 +112,6 @@ init_IRQ(void) wrent(entInt, 0); alpha_mv.init_irq(); - - init_hw_perf_events(); } /* diff --git a/arch/alpha/kernel/perf_event.c b/arch/alpha/kernel/perf_event.c index 1cc49683fb69..90561c45e7d8 100644 --- a/arch/alpha/kernel/perf_event.c +++ b/arch/alpha/kernel/perf_event.c @@ -14,6 +14,7 @@ #include <linux/kernel.h> #include <linux/kdebug.h> #include <linux/mutex.h> +#include <linux/init.h> #include <asm/hwrpb.h> #include <asm/atomic.h> @@ -863,13 +864,13 @@ static void alpha_perf_event_irq_handler(unsigned long la_ptr, /* * Init call to initialise performance events at kernel startup. */ -void __init init_hw_perf_events(void) +int __init init_hw_perf_events(void) { pr_info("Performance events: "); if (!supported_cpu()) { pr_cont("No support for your CPU.\n"); - return; + return 0; } pr_cont("Supported CPU type!\n"); @@ -881,6 +882,8 @@ void __init init_hw_perf_events(void) /* And set up PMU specification */ alpha_pmu = &ev67_pmu; - perf_pmu_register(&pmu); -} + perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW); + return 0; +} +early_initcall(init_hw_perf_events); diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index 07a50357492a..fdfa4976b0bf 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -3034,11 +3034,11 @@ init_hw_perf_events(void) pr_info("no hardware support available\n"); } - perf_pmu_register(&pmu); + perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW); return 0; } -arch_initcall(init_hw_perf_events); +early_initcall(init_hw_perf_events); /* * Callchain handling code. diff --git a/arch/arm/mach-at91/Makefile b/arch/arm/mach-at91/Makefile index 62d686f0b426..d13add71f72a 100644 --- a/arch/arm/mach-at91/Makefile +++ b/arch/arm/mach-at91/Makefile @@ -65,7 +65,7 @@ obj-$(CONFIG_MACH_AT91SAM9G20EK) += board-sam9g20ek.o obj-$(CONFIG_MACH_CPU9G20) += board-cpu9krea.o obj-$(CONFIG_MACH_STAMP9G20) += board-stamp9g20.o obj-$(CONFIG_MACH_PORTUXG20) += board-stamp9g20.o -obj-$(CONFIG_MACH_PCONTROL_G20) += board-pcontrol-g20.o +obj-$(CONFIG_MACH_PCONTROL_G20) += board-pcontrol-g20.o board-stamp9g20.o # AT91SAM9260/AT91SAM9G20 board-specific support obj-$(CONFIG_MACH_SNAPPER_9260) += board-snapper9260.o diff --git a/arch/arm/mach-at91/board-pcontrol-g20.c b/arch/arm/mach-at91/board-pcontrol-g20.c index bba5a560e02b..feb65787c30b 100644 --- a/arch/arm/mach-at91/board-pcontrol-g20.c +++ b/arch/arm/mach-at91/board-pcontrol-g20.c @@ -31,6 +31,7 @@ #include <mach/board.h> #include <mach/at91sam9_smc.h> +#include <mach/stamp9g20.h> #include "sam9_smc.h" #include "generic.h" @@ -38,11 +39,7 @@ static void __init pcontrol_g20_map_io(void) { - /* Initialize processor: 18.432 MHz crystal */ - at91sam9260_initialize(18432000); - - /* DGBU on ttyS0. (Rx, Tx) only TTL -> JTAG connector X7 17,19 ) */ - at91_register_uart(0, 0, 0); + stamp9g20_map_io(); /* USART0 on ttyS1. (Rx, Tx, CTS, RTS) piggyback A2 */ at91_register_uart(AT91SAM9260_ID_US0, 1, ATMEL_UART_CTS @@ -54,9 +51,6 @@ static void __init pcontrol_g20_map_io(void) /* USART2 on ttyS3. (Rx, Tx) 9bit-Bus Multidrop-mode X4 */ at91_register_uart(AT91SAM9260_ID_US4, 3, 0); - - /* set serial console to ttyS0 (ie, DBGU) */ - at91_set_serial_console(0); } @@ -66,38 +60,6 @@ static void __init init_irq(void) } -/* - * NAND flash 512MiB 1,8V 8-bit, sector size 128 KiB - */ -static struct atmel_nand_data __initdata nand_data = { - .ale = 21, - .cle = 22, - .rdy_pin = AT91_PIN_PC13, - .enable_pin = AT91_PIN_PC14, -}; - -/* - * Bus timings; unit = 7.57ns - */ -static struct sam9_smc_config __initdata nand_smc_config = { - .ncs_read_setup = 0, - .nrd_setup = 2, - .ncs_write_setup = 0, - .nwe_setup = 2, - - .ncs_read_pulse = 4, - .nrd_pulse = 4, - .ncs_write_pulse = 4, - .nwe_pulse = 4, - - .read_cycle = 7, - .write_cycle = 7, - - .mode = AT91_SMC_READMODE | AT91_SMC_WRITEMODE - | AT91_SMC_EXNWMODE_DISABLE | AT91_SMC_DBW_8, - .tdf_cycles = 3, -}; - static struct sam9_smc_config __initdata pcontrol_smc_config[2] = { { .ncs_read_setup = 16, .nrd_setup = 18, @@ -138,14 +100,6 @@ static struct sam9_smc_config __initdata pcontrol_smc_config[2] = { { .tdf_cycles = 1, } }; -static void __init add_device_nand(void) -{ - /* configure chip-select 3 (NAND) */ - sam9_smc_configure(3, &nand_smc_config); - at91_add_device_nand(&nand_data); -} - - static void __init add_device_pcontrol(void) { /* configure chip-select 4 (IO compatible to 8051 X4 ) */ @@ -156,23 +110,6 @@ static void __init add_device_pcontrol(void) /* - * MCI (SD/MMC) - * det_pin, wp_pin and vcc_pin are not connected - */ -#if defined(CONFIG_MMC_ATMELMCI) || defined(CONFIG_MMC_ATMELMCI_MODULE) -static struct mci_platform_data __initdata mmc_data = { - .slot[0] = { - .bus_width = 4, - }, -}; -#else -static struct at91_mmc_data __initdata mmc_data = { - .wire4 = 1, -}; -#endif - - -/* * USB Host port */ static struct at91_usbh_data __initdata usbh_data = { @@ -265,42 +202,13 @@ static struct spi_board_info pcontrol_g20_spi_devices[] = { }; -/* - * Dallas 1-Wire DS2431 - */ -static struct w1_gpio_platform_data w1_gpio_pdata = { - .pin = AT91_PIN_PA29, - .is_open_drain = 1, -}; - -static struct platform_device w1_device = { - .name = "w1-gpio", - .id = -1, - .dev.platform_data = &w1_gpio_pdata, -}; - -static void add_wire1(void) -{ - at91_set_GPIO_periph(w1_gpio_pdata.pin, 1); - at91_set_multi_drive(w1_gpio_pdata.pin, 1); - platform_device_register(&w1_device); -} - - static void __init pcontrol_g20_board_init(void) { - at91_add_device_serial(); - add_device_nand(); -#if defined(CONFIG_MMC_ATMELMCI) || defined(CONFIG_MMC_ATMELMCI_MODULE) - at91_add_device_mci(0, &mmc_data); -#else - at91_add_device_mmc(0, &mmc_data); -#endif + stamp9g20_board_init(); at91_add_device_usbh(&usbh_data); at91_add_device_eth(&macb_data); at91_add_device_i2c(pcontrol_g20_i2c_devices, ARRAY_SIZE(pcontrol_g20_i2c_devices)); - add_wire1(); add_device_pcontrol(); at91_add_device_spi(pcontrol_g20_spi_devices, ARRAY_SIZE(pcontrol_g20_spi_devices)); diff --git a/arch/arm/mach-at91/board-stamp9g20.c b/arch/arm/mach-at91/board-stamp9g20.c index 5206eef4a67e..f8902b118960 100644 --- a/arch/arm/mach-at91/board-stamp9g20.c +++ b/arch/arm/mach-at91/board-stamp9g20.c @@ -32,7 +32,7 @@ #include "generic.h" -static void __init portuxg20_map_io(void) +void __init stamp9g20_map_io(void) { /* Initialize processor: 18.432 MHz crystal */ at91sam9260_initialize(18432000); @@ -40,6 +40,24 @@ static void __init portuxg20_map_io(void) /* DGBU on ttyS0. (Rx & Tx only) */ at91_register_uart(0, 0, 0); + /* set serial console to ttyS0 (ie, DBGU) */ + at91_set_serial_console(0); +} + +static void __init stamp9g20evb_map_io(void) +{ + stamp9g20_map_io(); + + /* USART0 on ttyS1. (Rx, Tx, CTS, RTS, DTR, DSR, DCD, RI) */ + at91_register_uart(AT91SAM9260_ID_US0, 1, ATMEL_UART_CTS | ATMEL_UART_RTS + | ATMEL_UART_DTR | ATMEL_UART_DSR + | ATMEL_UART_DCD | ATMEL_UART_RI); +} + +static void __init portuxg20_map_io(void) +{ + stamp9g20_map_io(); + /* USART0 on ttyS1. (Rx, Tx, CTS, RTS, DTR, DSR, DCD, RI) */ at91_register_uart(AT91SAM9260_ID_US0, 1, ATMEL_UART_CTS | ATMEL_UART_RTS | ATMEL_UART_DTR | ATMEL_UART_DSR @@ -56,26 +74,6 @@ static void __init portuxg20_map_io(void) /* USART5 on ttyS6. (Rx, Tx only) */ at91_register_uart(AT91SAM9260_ID_US5, 6, 0); - - /* set serial console to ttyS0 (ie, DBGU) */ - at91_set_serial_console(0); -} - -static void __init stamp9g20_map_io(void) -{ - /* Initialize processor: 18.432 MHz crystal */ - at91sam9260_initialize(18432000); - - /* DGBU on ttyS0. (Rx & Tx only) */ - at91_register_uart(0, 0, 0); - - /* USART0 on ttyS1. (Rx, Tx, CTS, RTS, DTR, DSR, DCD, RI) */ - at91_register_uart(AT91SAM9260_ID_US0, 1, ATMEL_UART_CTS | ATMEL_UART_RTS - | ATMEL_UART_DTR | ATMEL_UART_DSR - | ATMEL_UART_DCD | ATMEL_UART_RI); - - /* set serial console to ttyS0 (ie, DBGU) */ - at91_set_serial_console(0); } static void __init init_irq(void) @@ -156,7 +154,7 @@ static struct at91_udc_data __initdata portuxg20_udc_data = { .pullup_pin = 0, /* pull-up driven by UDC */ }; -static struct at91_udc_data __initdata stamp9g20_udc_data = { +static struct at91_udc_data __initdata stamp9g20evb_udc_data = { .vbus_pin = AT91_PIN_PA22, .pullup_pin = 0, /* pull-up driven by UDC */ }; @@ -190,7 +188,7 @@ static struct gpio_led portuxg20_leds[] = { } }; -static struct gpio_led stamp9g20_leds[] = { +static struct gpio_led stamp9g20evb_leds[] = { { .name = "D8", .gpio = AT91_PIN_PB18, @@ -250,7 +248,7 @@ void add_w1(void) } -static void __init generic_board_init(void) +void __init stamp9g20_board_init(void) { /* Serial */ at91_add_device_serial(); @@ -262,34 +260,40 @@ static void __init generic_board_init(void) #else at91_add_device_mmc(0, &mmc_data); #endif - /* USB Host */ - at91_add_device_usbh(&usbh_data); - /* Ethernet */ - at91_add_device_eth(&macb_data); - /* I2C */ - at91_add_device_i2c(NULL, 0); /* W1 */ add_w1(); } static void __init portuxg20_board_init(void) { - generic_board_init(); - /* SPI */ - at91_add_device_spi(portuxg20_spi_devices, ARRAY_SIZE(portuxg20_spi_devices)); + stamp9g20_board_init(); + /* USB Host */ + at91_add_device_usbh(&usbh_data); /* USB Device */ at91_add_device_udc(&portuxg20_udc_data); + /* Ethernet */ + at91_add_device_eth(&macb_data); + /* I2C */ + at91_add_device_i2c(NULL, 0); + /* SPI */ + at91_add_device_spi(portuxg20_spi_devices, ARRAY_SIZE(portuxg20_spi_devices)); /* LEDs */ at91_gpio_leds(portuxg20_leds, ARRAY_SIZE(portuxg20_leds)); } -static void __init stamp9g20_board_init(void) +static void __init stamp9g20evb_board_init(void) { - generic_board_init(); + stamp9g20_board_init(); + /* USB Host */ + at91_add_device_usbh(&usbh_data); /* USB Device */ - at91_add_device_udc(&stamp9g20_udc_data); + at91_add_device_udc(&stamp9g20evb_udc_data); + /* Ethernet */ + at91_add_device_eth(&macb_data); + /* I2C */ + at91_add_device_i2c(NULL, 0); /* LEDs */ - at91_gpio_leds(stamp9g20_leds, ARRAY_SIZE(stamp9g20_leds)); + at91_gpio_leds(stamp9g20evb_leds, ARRAY_SIZE(stamp9g20evb_leds)); } MACHINE_START(PORTUXG20, "taskit PortuxG20") @@ -305,7 +309,7 @@ MACHINE_START(STAMP9G20, "taskit Stamp9G20") /* Maintainer: taskit GmbH */ .boot_params = AT91_SDRAM_BASE + 0x100, .timer = &at91sam926x_timer, - .map_io = stamp9g20_map_io, + .map_io = stamp9g20evb_map_io, .init_irq = init_irq, - .init_machine = stamp9g20_board_init, + .init_machine = stamp9g20evb_board_init, MACHINE_END diff --git a/arch/arm/mach-at91/clock.c b/arch/arm/mach-at91/clock.c index 7525cee3983f..9113da6845f1 100644 --- a/arch/arm/mach-at91/clock.c +++ b/arch/arm/mach-at91/clock.c @@ -658,7 +658,7 @@ static void __init at91_upll_usbfs_clock_init(unsigned long main_clock) /* Now set uhpck values */ uhpck.parent = &utmi_clk; uhpck.pmc_mask = AT91SAM926x_PMC_UHP; - uhpck.rate_hz = utmi_clk.parent->rate_hz; + uhpck.rate_hz = utmi_clk.rate_hz; uhpck.rate_hz /= 1 + ((at91_sys_read(AT91_PMC_USB) & AT91_PMC_OHCIUSBDIV) >> 8); } diff --git a/arch/arm/mach-at91/include/mach/stamp9g20.h b/arch/arm/mach-at91/include/mach/stamp9g20.h new file mode 100644 index 000000000000..6120f9c46d59 --- /dev/null +++ b/arch/arm/mach-at91/include/mach/stamp9g20.h @@ -0,0 +1,7 @@ +#ifndef __MACH_STAMP9G20_H +#define __MACH_STAMP9G20_H + +void stamp9g20_map_io(void); +void stamp9g20_board_init(void); + +#endif diff --git a/arch/arm/mach-s3c2412/Kconfig b/arch/arm/mach-s3c2412/Kconfig index fa2e5bffbb8e..6983cb4d4cae 100644 --- a/arch/arm/mach-s3c2412/Kconfig +++ b/arch/arm/mach-s3c2412/Kconfig @@ -28,9 +28,16 @@ config S3C2412_DMA config S3C2412_PM bool + select S3C2412_PM_SLEEP help Internal config node to apply S3C2412 power management +config S3C2412_PM_SLEEP + bool + help + Internal config node to apply sleep for S3C2412 power management. + Can be selected by another SoCs with similar sleep procedure. + # Note, the S3C2412 IOtiming support is in plat-s3c24xx config S3C2412_CPUFREQ diff --git a/arch/arm/mach-s3c2412/Makefile b/arch/arm/mach-s3c2412/Makefile index 530ec46cbaea..6c48a91ea39e 100644 --- a/arch/arm/mach-s3c2412/Makefile +++ b/arch/arm/mach-s3c2412/Makefile @@ -14,7 +14,8 @@ obj-$(CONFIG_CPU_S3C2412) += irq.o obj-$(CONFIG_CPU_S3C2412) += clock.o obj-$(CONFIG_CPU_S3C2412) += gpio.o obj-$(CONFIG_S3C2412_DMA) += dma.o -obj-$(CONFIG_S3C2412_PM) += pm.o sleep.o +obj-$(CONFIG_S3C2412_PM) += pm.o +obj-$(CONFIG_S3C2412_PM_SLEEP) += sleep.o obj-$(CONFIG_S3C2412_CPUFREQ) += cpu-freq.o # Machine support diff --git a/arch/arm/mach-s3c2416/Kconfig b/arch/arm/mach-s3c2416/Kconfig index 27b3e7c9d613..df8d14974c90 100644 --- a/arch/arm/mach-s3c2416/Kconfig +++ b/arch/arm/mach-s3c2416/Kconfig @@ -27,6 +27,7 @@ config S3C2416_DMA config S3C2416_PM bool + select S3C2412_PM_SLEEP help Internal config node to apply S3C2416 power management diff --git a/arch/arm/mach-s5pv210/mach-aquila.c b/arch/arm/mach-s5pv210/mach-aquila.c index 28677caf3613..461aa035afc0 100644 --- a/arch/arm/mach-s5pv210/mach-aquila.c +++ b/arch/arm/mach-s5pv210/mach-aquila.c @@ -378,6 +378,12 @@ static struct max8998_regulator_data aquila_regulators[] = { static struct max8998_platform_data aquila_max8998_pdata = { .num_regulators = ARRAY_SIZE(aquila_regulators), .regulators = aquila_regulators, + .buck1_set1 = S5PV210_GPH0(3), + .buck1_set2 = S5PV210_GPH0(4), + .buck2_set3 = S5PV210_GPH0(5), + .buck1_max_voltage1 = 1200000, + .buck1_max_voltage2 = 1200000, + .buck2_max_voltage = 1200000, }; #endif diff --git a/arch/arm/mach-s5pv210/mach-goni.c b/arch/arm/mach-s5pv210/mach-goni.c index b1dcf964a768..e22d5112fd44 100644 --- a/arch/arm/mach-s5pv210/mach-goni.c +++ b/arch/arm/mach-s5pv210/mach-goni.c @@ -518,6 +518,12 @@ static struct max8998_regulator_data goni_regulators[] = { static struct max8998_platform_data goni_max8998_pdata = { .num_regulators = ARRAY_SIZE(goni_regulators), .regulators = goni_regulators, + .buck1_set1 = S5PV210_GPH0(3), + .buck1_set2 = S5PV210_GPH0(4), + .buck2_set3 = S5PV210_GPH0(5), + .buck1_max_voltage1 = 1200000, + .buck1_max_voltage2 = 1200000, + .buck2_max_voltage = 1200000, }; #endif diff --git a/arch/arm/mach-shmobile/include/mach/entry-macro.S b/arch/arm/mach-shmobile/include/mach/entry-macro.S index a285d13c7416..f428c4db2b60 100644 --- a/arch/arm/mach-shmobile/include/mach/entry-macro.S +++ b/arch/arm/mach-shmobile/include/mach/entry-macro.S @@ -1,4 +1,5 @@ /* + * Copyright (C) 2010 Magnus Damm * Copyright (C) 2008 Renesas Solutions Corp. * * This program is free software; you can redistribute it and/or modify @@ -14,24 +15,45 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ -#include <mach/hardware.h> #include <mach/irqs.h> +#define INTCA_BASE 0xe6980000 +#define INTFLGA_OFFS 0x00000018 /* accept pending interrupt */ +#define INTEVTA_OFFS 0x00000020 /* vector number of accepted interrupt */ +#define INTLVLA_OFFS 0x00000030 /* priority level of accepted interrupt */ +#define INTLVLB_OFFS 0x00000034 /* previous priority level */ + .macro disable_fiq .endm .macro get_irqnr_preamble, base, tmp - ldr \base, =INTFLGA + ldr \base, =INTCA_BASE .endm .macro arch_ret_to_user, tmp1, tmp2 .endm .macro get_irqnr_and_base, irqnr, irqstat, base, tmp - ldr \irqnr, [\base] + /* The single INTFLGA read access below results in the following: + * + * 1. INTLVLB is updated with old priority value from INTLVLA + * 2. Highest priority interrupt is accepted + * 3. INTLVLA is updated to contain priority of accepted interrupt + * 4. Accepted interrupt vector is stored in INTFLGA and INTEVTA + */ + ldr \irqnr, [\base, #INTFLGA_OFFS] + + /* Restore INTLVLA with the value saved in INTLVLB. + * This is required to support interrupt priorities properly. + */ + ldrb \tmp, [\base, #INTLVLB_OFFS] + strb \tmp, [\base, #INTLVLA_OFFS] + + /* Handle invalid vector number case */ cmp \irqnr, #0 beq 1000f - /* intevt to irq number */ + + /* Convert vector to irq number, same as the evt2irq() macro */ lsr \irqnr, \irqnr, #0x5 subs \irqnr, \irqnr, #16 diff --git a/arch/arm/mach-shmobile/include/mach/vmalloc.h b/arch/arm/mach-shmobile/include/mach/vmalloc.h index 4aecf6e3a859..2b8fd8b942fe 100644 --- a/arch/arm/mach-shmobile/include/mach/vmalloc.h +++ b/arch/arm/mach-shmobile/include/mach/vmalloc.h @@ -2,6 +2,6 @@ #define __ASM_MACH_VMALLOC_H /* Vmalloc at ... - 0xe5ffffff */ -#define VMALLOC_END 0xe6000000 +#define VMALLOC_END 0xe6000000UL #endif /* __ASM_MACH_VMALLOC_H */ diff --git a/arch/arm/plat-s3c24xx/Kconfig b/arch/arm/plat-s3c24xx/Kconfig index 5a27b1b538f2..eb105e61c746 100644 --- a/arch/arm/plat-s3c24xx/Kconfig +++ b/arch/arm/plat-s3c24xx/Kconfig @@ -8,7 +8,7 @@ config PLAT_S3C24XX default y select NO_IOPORT select ARCH_REQUIRE_GPIOLIB - select S3C_DEVICE_NAND + select S3C_DEV_NAND select S3C_GPIO_CFG_S3C24XX help Base platform code for any Samsung S3C24XX device diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 67a2fa2caa49..0a9b5b8b2a19 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -19,6 +19,8 @@ config MIPS select GENERIC_ATOMIC64 if !64BIT select HAVE_DMA_ATTRS select HAVE_DMA_API_DEBUG + select HAVE_GENERIC_HARDIRQS + select GENERIC_IRQ_PROBE menu "Machine selection" @@ -1664,6 +1666,28 @@ config PAGE_SIZE_64KB endchoice +config FORCE_MAX_ZONEORDER + int "Maximum zone order" + range 13 64 if SYS_SUPPORTS_HUGETLBFS && PAGE_SIZE_32KB + default "13" if SYS_SUPPORTS_HUGETLBFS && PAGE_SIZE_32KB + range 12 64 if SYS_SUPPORTS_HUGETLBFS && PAGE_SIZE_16KB + default "12" if SYS_SUPPORTS_HUGETLBFS && PAGE_SIZE_16KB + range 11 64 + default "11" + help + The kernel memory allocator divides physically contiguous memory + blocks into "zones", where each zone is a power of two number of + pages. This option selects the largest power of two that the kernel + keeps in the memory allocator. If you need to allocate very large + blocks of physically contiguous memory, then you may need to + increase this value. + + This config option is actually maximum order plus one. For example, + a value of 11 means that the largest free memory block is 2^10 pages. + + The page size is not necessarily 4KB. Keep this in mind + when choosing a value for this option. + config BOARD_SCACHE bool @@ -1922,20 +1946,6 @@ config CPU_R4400_WORKAROUNDS bool # -# Use the generic interrupt handling code in kernel/irq/: -# -config GENERIC_HARDIRQS - bool - default y - -config GENERIC_IRQ_PROBE - bool - default y - -config IRQ_PER_CPU - bool - -# # - Highmem only makes sense for the 32-bit kernel. # - The current highmem code will only work properly on physically indexed # caches such as R3000, SB1, R7000 or those that look like they're virtually diff --git a/arch/mips/alchemy/common/platform.c b/arch/mips/alchemy/common/platform.c index 3691630931d6..9e7814db3d03 100644 --- a/arch/mips/alchemy/common/platform.c +++ b/arch/mips/alchemy/common/platform.c @@ -27,6 +27,7 @@ static void alchemy_8250_pm(struct uart_port *port, unsigned int state, unsigned int old_state) { +#ifdef CONFIG_SERIAL_8250 switch (state) { case 0: if ((__raw_readl(port->membase + UART_MOD_CNTRL) & 3) != 3) { @@ -49,6 +50,7 @@ static void alchemy_8250_pm(struct uart_port *port, unsigned int state, serial8250_do_pm(port, state, old_state); break; } +#endif } #define PORT(_base, _irq) \ diff --git a/arch/mips/alchemy/devboards/prom.c b/arch/mips/alchemy/devboards/prom.c index b30df5c97ad3..baeb21385058 100644 --- a/arch/mips/alchemy/devboards/prom.c +++ b/arch/mips/alchemy/devboards/prom.c @@ -54,10 +54,9 @@ void __init prom_init(void) prom_init_cmdline(); memsize_str = prom_getenv("memsize"); - if (!memsize_str) + if (!memsize_str || strict_strtoul(memsize_str, 0, &memsize)) memsize = ALCHEMY_BOARD_DEFAULT_MEMSIZE; - else - strict_strtoul(memsize_str, 0, &memsize); + add_memory_region(0, memsize, BOOT_MEM_RAM); } diff --git a/arch/mips/ar7/clock.c b/arch/mips/ar7/clock.c index fc0e7154e8d6..2ca4ada1c291 100644 --- a/arch/mips/ar7/clock.c +++ b/arch/mips/ar7/clock.c @@ -239,12 +239,12 @@ static void tnetd7300_set_clock(u32 shift, struct tnetd7300_clock *clock, calculate(base_clock, frequency, &prediv, &postdiv, &mul); writel(((prediv - 1) << PREDIV_SHIFT) | (postdiv - 1), &clock->ctrl); - msleep(1); + mdelay(1); writel(4, &clock->pll); while (readl(&clock->pll) & PLL_STATUS) ; writel(((mul - 1) << MUL_SHIFT) | (0xff << 3) | 0x0e, &clock->pll); - msleep(75); + mdelay(75); } static void __init tnetd7300_init_clocks(void) @@ -456,7 +456,7 @@ void clk_put(struct clk *clk) } EXPORT_SYMBOL(clk_put); -int __init ar7_init_clocks(void) +void __init ar7_init_clocks(void) { switch (ar7_chip_id()) { case AR7_CHIP_7100: @@ -472,7 +472,4 @@ int __init ar7_init_clocks(void) } /* adjust vbus clock rate */ vbus_clk.rate = bus_clk.rate / 2; - - return 0; } -arch_initcall(ar7_init_clocks); diff --git a/arch/mips/ar7/time.c b/arch/mips/ar7/time.c index 5fb8a0134085..22c93213b233 100644 --- a/arch/mips/ar7/time.c +++ b/arch/mips/ar7/time.c @@ -30,6 +30,9 @@ void __init plat_time_init(void) { struct clk *cpu_clk; + /* Initialize ar7 clocks so the CPU clock frequency is correct */ + ar7_init_clocks(); + cpu_clk = clk_get(NULL, "cpu"); if (IS_ERR(cpu_clk)) { printk(KERN_ERR "unable to get cpu clock\n"); diff --git a/arch/mips/bcm47xx/setup.c b/arch/mips/bcm47xx/setup.c index b1aee33efd11..c95f90bf734c 100644 --- a/arch/mips/bcm47xx/setup.c +++ b/arch/mips/bcm47xx/setup.c @@ -32,7 +32,6 @@ #include <asm/reboot.h> #include <asm/time.h> #include <bcm47xx.h> -#include <asm/fw/cfe/cfe_api.h> #include <asm/mach-bcm47xx/nvram.h> struct ssb_bus ssb_bcm47xx; @@ -57,68 +56,112 @@ static void bcm47xx_machine_halt(void) cpu_relax(); } -static void str2eaddr(char *str, char *dest) -{ - int i = 0; +#define READ_FROM_NVRAM(_outvar, name, buf) \ + if (nvram_getenv(name, buf, sizeof(buf)) >= 0)\ + sprom->_outvar = simple_strtoul(buf, NULL, 0); - if (str == NULL) { - memset(dest, 0, 6); - return; +static void bcm47xx_fill_sprom(struct ssb_sprom *sprom) +{ + char buf[100]; + u32 boardflags; + + memset(sprom, 0, sizeof(struct ssb_sprom)); + + sprom->revision = 1; /* Fallback: Old hardware does not define this. */ + READ_FROM_NVRAM(revision, "sromrev", buf); + if (nvram_getenv("il0macaddr", buf, sizeof(buf)) >= 0) + nvram_parse_macaddr(buf, sprom->il0mac); + if (nvram_getenv("et0macaddr", buf, sizeof(buf)) >= 0) + nvram_parse_macaddr(buf, sprom->et0mac); + if (nvram_getenv("et1macaddr", buf, sizeof(buf)) >= 0) + nvram_parse_macaddr(buf, sprom->et1mac); + READ_FROM_NVRAM(et0phyaddr, "et0phyaddr", buf); + READ_FROM_NVRAM(et1phyaddr, "et1phyaddr", buf); + READ_FROM_NVRAM(et0mdcport, "et0mdcport", buf); + READ_FROM_NVRAM(et1mdcport, "et1mdcport", buf); + READ_FROM_NVRAM(board_rev, "boardrev", buf); + READ_FROM_NVRAM(country_code, "ccode", buf); + READ_FROM_NVRAM(ant_available_a, "aa5g", buf); + READ_FROM_NVRAM(ant_available_bg, "aa2g", buf); + READ_FROM_NVRAM(pa0b0, "pa0b0", buf); + READ_FROM_NVRAM(pa0b1, "pa0b1", buf); + READ_FROM_NVRAM(pa0b2, "pa0b2", buf); + READ_FROM_NVRAM(pa1b0, "pa1b0", buf); + READ_FROM_NVRAM(pa1b1, "pa1b1", buf); + READ_FROM_NVRAM(pa1b2, "pa1b2", buf); + READ_FROM_NVRAM(pa1lob0, "pa1lob0", buf); + READ_FROM_NVRAM(pa1lob2, "pa1lob1", buf); + READ_FROM_NVRAM(pa1lob1, "pa1lob2", buf); + READ_FROM_NVRAM(pa1hib0, "pa1hib0", buf); + READ_FROM_NVRAM(pa1hib2, "pa1hib1", buf); + READ_FROM_NVRAM(pa1hib1, "pa1hib2", buf); + READ_FROM_NVRAM(gpio0, "wl0gpio0", buf); + READ_FROM_NVRAM(gpio1, "wl0gpio1", buf); + READ_FROM_NVRAM(gpio2, "wl0gpio2", buf); + READ_FROM_NVRAM(gpio3, "wl0gpio3", buf); + READ_FROM_NVRAM(maxpwr_bg, "pa0maxpwr", buf); + READ_FROM_NVRAM(maxpwr_al, "pa1lomaxpwr", buf); + READ_FROM_NVRAM(maxpwr_a, "pa1maxpwr", buf); + READ_FROM_NVRAM(maxpwr_ah, "pa1himaxpwr", buf); + READ_FROM_NVRAM(itssi_a, "pa1itssit", buf); + READ_FROM_NVRAM(itssi_bg, "pa0itssit", buf); + READ_FROM_NVRAM(tri2g, "tri2g", buf); + READ_FROM_NVRAM(tri5gl, "tri5gl", buf); + READ_FROM_NVRAM(tri5g, "tri5g", buf); + READ_FROM_NVRAM(tri5gh, "tri5gh", buf); + READ_FROM_NVRAM(rxpo2g, "rxpo2g", buf); + READ_FROM_NVRAM(rxpo5g, "rxpo5g", buf); + READ_FROM_NVRAM(rssisav2g, "rssisav2g", buf); + READ_FROM_NVRAM(rssismc2g, "rssismc2g", buf); + READ_FROM_NVRAM(rssismf2g, "rssismf2g", buf); + READ_FROM_NVRAM(bxa2g, "bxa2g", buf); + READ_FROM_NVRAM(rssisav5g, "rssisav5g", buf); + READ_FROM_NVRAM(rssismc5g, "rssismc5g", buf); + READ_FROM_NVRAM(rssismf5g, "rssismf5g", buf); + READ_FROM_NVRAM(bxa5g, "bxa5g", buf); + READ_FROM_NVRAM(cck2gpo, "cck2gpo", buf); + READ_FROM_NVRAM(ofdm2gpo, "ofdm2gpo", buf); + READ_FROM_NVRAM(ofdm5glpo, "ofdm5glpo", buf); + READ_FROM_NVRAM(ofdm5gpo, "ofdm5gpo", buf); + READ_FROM_NVRAM(ofdm5ghpo, "ofdm5ghpo", buf); + + if (nvram_getenv("boardflags", buf, sizeof(buf)) >= 0) { + boardflags = simple_strtoul(buf, NULL, 0); + if (boardflags) { + sprom->boardflags_lo = (boardflags & 0x0000FFFFU); + sprom->boardflags_hi = (boardflags & 0xFFFF0000U) >> 16; + } } - - for (;;) { - dest[i++] = (char) simple_strtoul(str, NULL, 16); - str += 2; - if (!*str++ || i == 6) - break; + if (nvram_getenv("boardflags2", buf, sizeof(buf)) >= 0) { + boardflags = simple_strtoul(buf, NULL, 0); + if (boardflags) { + sprom->boardflags2_lo = (boardflags & 0x0000FFFFU); + sprom->boardflags2_hi = (boardflags & 0xFFFF0000U) >> 16; + } } } static int bcm47xx_get_invariants(struct ssb_bus *bus, struct ssb_init_invariants *iv) { - char buf[100]; + char buf[20]; /* Fill boardinfo structure */ memset(&(iv->boardinfo), 0 , sizeof(struct ssb_boardinfo)); - if (cfe_getenv("boardvendor", buf, sizeof(buf)) >= 0 || - nvram_getenv("boardvendor", buf, sizeof(buf)) >= 0) - iv->boardinfo.type = (u16)simple_strtoul(buf, NULL, 0); - if (cfe_getenv("boardtype", buf, sizeof(buf)) >= 0 || - nvram_getenv("boardtype", buf, sizeof(buf)) >= 0) + if (nvram_getenv("boardvendor", buf, sizeof(buf)) >= 0) + iv->boardinfo.vendor = (u16)simple_strtoul(buf, NULL, 0); + else + iv->boardinfo.vendor = SSB_BOARDVENDOR_BCM; + if (nvram_getenv("boardtype", buf, sizeof(buf)) >= 0) iv->boardinfo.type = (u16)simple_strtoul(buf, NULL, 0); - if (cfe_getenv("boardrev", buf, sizeof(buf)) >= 0 || - nvram_getenv("boardrev", buf, sizeof(buf)) >= 0) + if (nvram_getenv("boardrev", buf, sizeof(buf)) >= 0) iv->boardinfo.rev = (u16)simple_strtoul(buf, NULL, 0); - /* Fill sprom structure */ - memset(&(iv->sprom), 0, sizeof(struct ssb_sprom)); - iv->sprom.revision = 3; - - if (cfe_getenv("et0macaddr", buf, sizeof(buf)) >= 0 || - nvram_getenv("et0macaddr", buf, sizeof(buf)) >= 0) - str2eaddr(buf, iv->sprom.et0mac); + bcm47xx_fill_sprom(&iv->sprom); - if (cfe_getenv("et1macaddr", buf, sizeof(buf)) >= 0 || - nvram_getenv("et1macaddr", buf, sizeof(buf)) >= 0) - str2eaddr(buf, iv->sprom.et1mac); - - if (cfe_getenv("et0phyaddr", buf, sizeof(buf)) >= 0 || - nvram_getenv("et0phyaddr", buf, sizeof(buf)) >= 0) - iv->sprom.et0phyaddr = simple_strtoul(buf, NULL, 0); - - if (cfe_getenv("et1phyaddr", buf, sizeof(buf)) >= 0 || - nvram_getenv("et1phyaddr", buf, sizeof(buf)) >= 0) - iv->sprom.et1phyaddr = simple_strtoul(buf, NULL, 0); - - if (cfe_getenv("et0mdcport", buf, sizeof(buf)) >= 0 || - nvram_getenv("et0mdcport", buf, sizeof(buf)) >= 0) - iv->sprom.et0mdcport = simple_strtoul(buf, NULL, 10); - - if (cfe_getenv("et1mdcport", buf, sizeof(buf)) >= 0 || - nvram_getenv("et1mdcport", buf, sizeof(buf)) >= 0) - iv->sprom.et1mdcport = simple_strtoul(buf, NULL, 10); + if (nvram_getenv("cardbus", buf, sizeof(buf)) >= 0) + iv->has_cardbus_slot = !!simple_strtoul(buf, NULL, 10); return 0; } @@ -126,12 +169,28 @@ static int bcm47xx_get_invariants(struct ssb_bus *bus, void __init plat_mem_setup(void) { int err; + char buf[100]; + struct ssb_mipscore *mcore; err = ssb_bus_ssbbus_register(&ssb_bcm47xx, SSB_ENUM_BASE, bcm47xx_get_invariants); if (err) panic("Failed to initialize SSB bus (err %d)\n", err); + mcore = &ssb_bcm47xx.mipscore; + if (nvram_getenv("kernel_args", buf, sizeof(buf)) >= 0) { + if (strstr(buf, "console=ttyS1")) { + struct ssb_serial_port port; + + printk(KERN_DEBUG "Swapping serial ports!\n"); + /* swap serial ports */ + memcpy(&port, &mcore->serial_ports[0], sizeof(port)); + memcpy(&mcore->serial_ports[0], &mcore->serial_ports[1], + sizeof(port)); + memcpy(&mcore->serial_ports[1], &port, sizeof(port)); + } + } + _machine_restart = bcm47xx_machine_restart; _machine_halt = bcm47xx_machine_halt; pm_power_off = bcm47xx_machine_halt; diff --git a/arch/mips/include/asm/cpu.h b/arch/mips/include/asm/cpu.h index 06d59dcbe243..86877539c6e8 100644 --- a/arch/mips/include/asm/cpu.h +++ b/arch/mips/include/asm/cpu.h @@ -111,8 +111,8 @@ * These are the PRID's for when 23:16 == PRID_COMP_BROADCOM */ -#define PRID_IMP_BMIPS4KC 0x4000 -#define PRID_IMP_BMIPS32 0x8000 +#define PRID_IMP_BMIPS32_REV4 0x4000 +#define PRID_IMP_BMIPS32_REV8 0x8000 #define PRID_IMP_BMIPS3300 0x9000 #define PRID_IMP_BMIPS3300_ALT 0x9100 #define PRID_IMP_BMIPS3300_BUG 0x0000 diff --git a/arch/mips/include/asm/elf.h b/arch/mips/include/asm/elf.h index fd1d39eb7431..455c0ac7d4ea 100644 --- a/arch/mips/include/asm/elf.h +++ b/arch/mips/include/asm/elf.h @@ -249,7 +249,8 @@ extern struct mips_abi mips_abi_n32; #define SET_PERSONALITY(ex) \ do { \ - set_personality(PER_LINUX); \ + if (personality(current->personality) != PER_LINUX) \ + set_personality(PER_LINUX); \ \ current->thread.abi = &mips_abi; \ } while (0) @@ -296,6 +297,8 @@ do { \ #define SET_PERSONALITY(ex) \ do { \ + unsigned int p; \ + \ clear_thread_flag(TIF_32BIT_REGS); \ clear_thread_flag(TIF_32BIT_ADDR); \ \ @@ -304,7 +307,8 @@ do { \ else \ current->thread.abi = &mips_abi; \ \ - if (current->personality != PER_LINUX32) \ + p = personality(current->personality); \ + if (p != PER_LINUX32 && p != PER_LINUX) \ set_personality(PER_LINUX); \ } while (0) diff --git a/arch/mips/include/asm/io.h b/arch/mips/include/asm/io.h index c98bf514ec7d..5b017f23e243 100644 --- a/arch/mips/include/asm/io.h +++ b/arch/mips/include/asm/io.h @@ -329,10 +329,14 @@ static inline void pfx##write##bwlq(type val, \ "dsrl32 %L0, %L0, 0" "\n\t" \ "dsll32 %M0, %M0, 0" "\n\t" \ "or %L0, %L0, %M0" "\n\t" \ + ".set push" "\n\t" \ + ".set noreorder" "\n\t" \ + ".set nomacro" "\n\t" \ "sd %L0, %2" "\n\t" \ + ".set pop" "\n\t" \ ".set mips0" "\n" \ : "=r" (__tmp) \ - : "0" (__val), "m" (*__mem)); \ + : "0" (__val), "R" (*__mem)); \ if (irq) \ local_irq_restore(__flags); \ } else \ @@ -355,12 +359,16 @@ static inline type pfx##read##bwlq(const volatile void __iomem *mem) \ local_irq_save(__flags); \ __asm__ __volatile__( \ ".set mips3" "\t\t# __readq" "\n\t" \ + ".set push" "\n\t" \ + ".set noreorder" "\n\t" \ + ".set nomacro" "\n\t" \ "ld %L0, %1" "\n\t" \ + ".set pop" "\n\t" \ "dsra32 %M0, %L0, 0" "\n\t" \ "sll %L0, %L0, 0" "\n\t" \ ".set mips0" "\n" \ : "=r" (__val) \ - : "m" (*__mem)); \ + : "R" (*__mem)); \ if (irq) \ local_irq_restore(__flags); \ } else { \ diff --git a/arch/mips/include/asm/mach-ar7/ar7.h b/arch/mips/include/asm/mach-ar7/ar7.h index 7919d76186bf..07d3fadb2443 100644 --- a/arch/mips/include/asm/mach-ar7/ar7.h +++ b/arch/mips/include/asm/mach-ar7/ar7.h @@ -201,7 +201,6 @@ static inline void ar7_device_off(u32 bit) } int __init ar7_gpio_init(void); - -int __init ar7_gpio_init(void); +void __init ar7_init_clocks(void); #endif /* __AR7_H__ */ diff --git a/arch/mips/include/asm/mach-bcm47xx/nvram.h b/arch/mips/include/asm/mach-bcm47xx/nvram.h index c58ebd8bc155..9759588ba3cf 100644 --- a/arch/mips/include/asm/mach-bcm47xx/nvram.h +++ b/arch/mips/include/asm/mach-bcm47xx/nvram.h @@ -12,6 +12,7 @@ #define __NVRAM_H #include <linux/types.h> +#include <linux/kernel.h> struct nvram_header { u32 magic; @@ -36,4 +37,10 @@ struct nvram_header { extern int nvram_getenv(char *name, char *val, size_t val_len); +static inline void nvram_parse_macaddr(char *buf, u8 *macaddr) +{ + sscanf(buf, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx", &macaddr[0], &macaddr[1], + &macaddr[2], &macaddr[3], &macaddr[4], &macaddr[5]); +} + #endif diff --git a/arch/mips/jz4740/board-qi_lb60.c b/arch/mips/jz4740/board-qi_lb60.c index 5742bb4d78f4..5c0a3575877c 100644 --- a/arch/mips/jz4740/board-qi_lb60.c +++ b/arch/mips/jz4740/board-qi_lb60.c @@ -5,7 +5,7 @@ * * Copyright (c) 2009 Qi Hardware inc., * Author: Xiangfu Liu <xiangfu@qi-hardware.com> - * Copyright 2010, Lars-Petrer Clausen <lars@metafoo.de> + * Copyright 2010, Lars-Peter Clausen <lars@metafoo.de> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 or later @@ -235,7 +235,7 @@ static const unsigned int qi_lb60_keypad_rows[] = { QI_LB60_GPIO_KEYIN(3), QI_LB60_GPIO_KEYIN(4), QI_LB60_GPIO_KEYIN(5), - QI_LB60_GPIO_KEYIN(7), + QI_LB60_GPIO_KEYIN(6), QI_LB60_GPIO_KEYIN8, }; diff --git a/arch/mips/jz4740/platform.c b/arch/mips/jz4740/platform.c index 95bc2b5b14f1..1cc9e544d16b 100644 --- a/arch/mips/jz4740/platform.c +++ b/arch/mips/jz4740/platform.c @@ -208,7 +208,7 @@ struct platform_device jz4740_i2s_device = { /* PCM */ struct platform_device jz4740_pcm_device = { - .name = "jz4740-pcm", + .name = "jz4740-pcm-audio", .id = -1, }; diff --git a/arch/mips/jz4740/prom.c b/arch/mips/jz4740/prom.c index cfeac15eb2e4..4a70407f55bb 100644 --- a/arch/mips/jz4740/prom.c +++ b/arch/mips/jz4740/prom.c @@ -23,7 +23,7 @@ #include <asm/bootinfo.h> #include <asm/mach-jz4740/base.h> -void jz4740_init_cmdline(int argc, char *argv[]) +static __init void jz4740_init_cmdline(int argc, char *argv[]) { unsigned int count = COMMAND_LINE_SIZE - 1; int i; diff --git a/arch/mips/kernel/cevt-r4k.c b/arch/mips/kernel/cevt-r4k.c index 2f4d7a99bcc2..98c5a9737c14 100644 --- a/arch/mips/kernel/cevt-r4k.c +++ b/arch/mips/kernel/cevt-r4k.c @@ -32,7 +32,7 @@ static int mips_next_event(unsigned long delta, cnt = read_c0_count(); cnt += delta; write_c0_compare(cnt); - res = ((int)(read_c0_count() - cnt) > 0) ? -ETIME : 0; + res = ((int)(read_c0_count() - cnt) >= 0) ? -ETIME : 0; return res; } diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c index 71620e19827a..68dae7b6b5db 100644 --- a/arch/mips/kernel/cpu-probe.c +++ b/arch/mips/kernel/cpu-probe.c @@ -905,7 +905,8 @@ static inline void cpu_probe_broadcom(struct cpuinfo_mips *c, unsigned int cpu) { decode_configs(c); switch (c->processor_id & 0xff00) { - case PRID_IMP_BMIPS32: + case PRID_IMP_BMIPS32_REV4: + case PRID_IMP_BMIPS32_REV8: c->cputype = CPU_BMIPS32; __cpu_name[cpu] = "Broadcom BMIPS32"; break; @@ -933,10 +934,6 @@ static inline void cpu_probe_broadcom(struct cpuinfo_mips *c, unsigned int cpu) __cpu_name[cpu] = "Broadcom BMIPS5000"; c->options |= MIPS_CPU_ULRI; break; - case PRID_IMP_BMIPS4KC: - c->cputype = CPU_4KC; - __cpu_name[cpu] = "MIPS 4Kc"; - break; } } diff --git a/arch/mips/kernel/linux32.c b/arch/mips/kernel/linux32.c index 6343b4a5b835..876a75cc376f 100644 --- a/arch/mips/kernel/linux32.c +++ b/arch/mips/kernel/linux32.c @@ -251,14 +251,15 @@ SYSCALL_DEFINE5(n32_msgrcv, int, msqid, u32, msgp, size_t, msgsz, SYSCALL_DEFINE1(32_personality, unsigned long, personality) { + unsigned int p = personality & 0xffffffff; int ret; - personality &= 0xffffffff; + if (personality(current->personality) == PER_LINUX32 && - personality == PER_LINUX) - personality = PER_LINUX32; - ret = sys_personality(personality); - if (ret == PER_LINUX32) - ret = PER_LINUX; + personality(p) == PER_LINUX) + p = (p & ~PER_MASK) | PER_LINUX32; + ret = sys_personality(p); + if (ret != -1 && personality(ret) == PER_LINUX32) + ret = (ret & ~PER_MASK) | PER_LINUX; return ret; } diff --git a/arch/mips/kernel/perf_event_mipsxx.c b/arch/mips/kernel/perf_event_mipsxx.c index 5c7c6fc07565..183e0d226669 100644 --- a/arch/mips/kernel/perf_event_mipsxx.c +++ b/arch/mips/kernel/perf_event_mipsxx.c @@ -1047,6 +1047,6 @@ init_hw_perf_events(void) return 0; } -arch_initcall(init_hw_perf_events); +early_initcall(init_hw_perf_events); #endif /* defined(CONFIG_CPU_MIPS32)... */ diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index 99960940d4a4..ae167df73ddd 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -142,7 +142,6 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, childregs->regs[7] = 0; /* Clear error flag */ childregs->regs[2] = 0; /* Child gets zero as return value */ - regs->regs[2] = p->pid; if (childregs->cp0_status & ST0_CU0) { childregs->regs[28] = (unsigned long) ti; diff --git a/arch/mips/kernel/prom.c b/arch/mips/kernel/prom.c index e000b278f024..9dbe58368953 100644 --- a/arch/mips/kernel/prom.c +++ b/arch/mips/kernel/prom.c @@ -100,7 +100,7 @@ void __init device_tree_init(void) return; base = virt_to_phys((void *)initial_boot_params); - size = initial_boot_params->totalsize; + size = be32_to_cpu(initial_boot_params->totalsize); /* Before we do anything, lets reserve the dt blob */ reserve_mem_mach(base, size); diff --git a/arch/mips/kernel/smp-mt.c b/arch/mips/kernel/smp-mt.c index 43e7cdc5ded2..c0e81418ba21 100644 --- a/arch/mips/kernel/smp-mt.c +++ b/arch/mips/kernel/smp-mt.c @@ -153,7 +153,7 @@ static void __cpuinit vsmp_init_secondary(void) { extern int gic_present; - /* This is Malta specific: IPI,performance and timer inetrrupts */ + /* This is Malta specific: IPI,performance and timer interrupts */ if (gic_present) change_c0_status(ST0_IM, STATUSF_IP3 | STATUSF_IP4 | STATUSF_IP6 | STATUSF_IP7); diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index 8e9fbe75894e..e97104302541 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -83,7 +83,8 @@ extern asmlinkage void handle_mcheck(void); extern asmlinkage void handle_reserved(void); extern int fpu_emulator_cop1Handler(struct pt_regs *xcp, - struct mips_fpu_struct *ctx, int has_fpu); + struct mips_fpu_struct *ctx, int has_fpu, + void *__user *fault_addr); void (*board_be_init)(void); int (*board_be_handler)(struct pt_regs *regs, int is_fixup); @@ -661,12 +662,36 @@ asmlinkage void do_ov(struct pt_regs *regs) force_sig_info(SIGFPE, &info, current); } +static int process_fpemu_return(int sig, void __user *fault_addr) +{ + if (sig == SIGSEGV || sig == SIGBUS) { + struct siginfo si = {0}; + si.si_addr = fault_addr; + si.si_signo = sig; + if (sig == SIGSEGV) { + if (find_vma(current->mm, (unsigned long)fault_addr)) + si.si_code = SEGV_ACCERR; + else + si.si_code = SEGV_MAPERR; + } else { + si.si_code = BUS_ADRERR; + } + force_sig_info(sig, &si, current); + return 1; + } else if (sig) { + force_sig(sig, current); + return 1; + } else { + return 0; + } +} + /* * XXX Delayed fp exceptions when doing a lazy ctx switch XXX */ asmlinkage void do_fpe(struct pt_regs *regs, unsigned long fcr31) { - siginfo_t info; + siginfo_t info = {0}; if (notify_die(DIE_FP, "FP exception", regs, 0, regs_to_trapnr(regs), SIGFPE) == NOTIFY_STOP) @@ -675,6 +700,7 @@ asmlinkage void do_fpe(struct pt_regs *regs, unsigned long fcr31) if (fcr31 & FPU_CSR_UNI_X) { int sig; + void __user *fault_addr = NULL; /* * Unimplemented operation exception. If we've got the full @@ -690,7 +716,8 @@ asmlinkage void do_fpe(struct pt_regs *regs, unsigned long fcr31) lose_fpu(1); /* Run the emulator */ - sig = fpu_emulator_cop1Handler(regs, ¤t->thread.fpu, 1); + sig = fpu_emulator_cop1Handler(regs, ¤t->thread.fpu, 1, + &fault_addr); /* * We can't allow the emulated instruction to leave any of @@ -702,8 +729,7 @@ asmlinkage void do_fpe(struct pt_regs *regs, unsigned long fcr31) own_fpu(1); /* Using the FPU again. */ /* If something went wrong, signal */ - if (sig) - force_sig(sig, current); + process_fpemu_return(sig, fault_addr); return; } else if (fcr31 & FPU_CSR_INV_X) @@ -996,11 +1022,11 @@ asmlinkage void do_cpu(struct pt_regs *regs) if (!raw_cpu_has_fpu) { int sig; + void __user *fault_addr = NULL; sig = fpu_emulator_cop1Handler(regs, - ¤t->thread.fpu, 0); - if (sig) - force_sig(sig, current); - else + ¤t->thread.fpu, + 0, &fault_addr); + if (!process_fpemu_return(sig, fault_addr)) mt_ase_fp_affinity(); } diff --git a/arch/mips/kernel/vpe.c b/arch/mips/kernel/vpe.c index 3eb3cde2f661..6a1fdfef8fde 100644 --- a/arch/mips/kernel/vpe.c +++ b/arch/mips/kernel/vpe.c @@ -1092,6 +1092,10 @@ static int vpe_open(struct inode *inode, struct file *filp) /* this of-course trashes what was there before... */ v->pbuffer = vmalloc(P_SIZE); + if (!v->pbuffer) { + pr_warning("VPE loader: unable to allocate memory\n"); + return -ENOMEM; + } v->plen = P_SIZE; v->load_addr = NULL; v->len = 0; @@ -1149,10 +1153,9 @@ static int vpe_release(struct inode *inode, struct file *filp) if (ret < 0) v->shared_ptr = NULL; - // cleanup any temp buffers - if (v->pbuffer) - vfree(v->pbuffer); + vfree(v->pbuffer); v->plen = 0; + return ret; } @@ -1169,11 +1172,6 @@ static ssize_t vpe_write(struct file *file, const char __user * buffer, if (v == NULL) return -ENODEV; - if (v->pbuffer == NULL) { - printk(KERN_ERR "VPE loader: no buffer for program\n"); - return -ENOMEM; - } - if ((count + v->len) > v->plen) { printk(KERN_WARNING "VPE loader: elf size too big. Perhaps strip uneeded symbols\n"); diff --git a/arch/mips/lib/memset.S b/arch/mips/lib/memset.S index 77dc3b20110a..606c8a9efe3b 100644 --- a/arch/mips/lib/memset.S +++ b/arch/mips/lib/memset.S @@ -161,16 +161,16 @@ FEXPORT(__bzero) .Lfwd_fixup: PTR_L t0, TI_TASK($28) - LONG_L t0, THREAD_BUADDR(t0) andi a2, 0x3f + LONG_L t0, THREAD_BUADDR(t0) LONG_ADDU a2, t1 jr ra LONG_SUBU a2, t0 .Lpartial_fixup: PTR_L t0, TI_TASK($28) - LONG_L t0, THREAD_BUADDR(t0) andi a2, LONGMASK + LONG_L t0, THREAD_BUADDR(t0) LONG_ADDU a2, t1 jr ra LONG_SUBU a2, t0 diff --git a/arch/mips/loongson/common/env.c b/arch/mips/loongson/common/env.c index ae4cff97a56c..11b193f848f8 100644 --- a/arch/mips/loongson/common/env.c +++ b/arch/mips/loongson/common/env.c @@ -29,9 +29,9 @@ unsigned long memsize, highmemsize; #define parse_even_earlier(res, option, p) \ do { \ + int ret; \ if (strncmp(option, (char *)p, strlen(option)) == 0) \ - strict_strtol((char *)p + strlen(option"="), \ - 10, &res); \ + ret = strict_strtol((char *)p + strlen(option"="), 10, &res); \ } while (0) void __init prom_init_env(void) diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c index b2ad1b0910ff..d32cb0503110 100644 --- a/arch/mips/math-emu/cp1emu.c +++ b/arch/mips/math-emu/cp1emu.c @@ -64,7 +64,7 @@ static int fpu_emu(struct pt_regs *, struct mips_fpu_struct *, #if __mips >= 4 && __mips != 32 static int fpux_emu(struct pt_regs *, - struct mips_fpu_struct *, mips_instruction); + struct mips_fpu_struct *, mips_instruction, void *__user *); #endif /* Further private data for which no space exists in mips_fpu_struct */ @@ -208,16 +208,23 @@ static inline int cop1_64bit(struct pt_regs *xcp) * Two instructions if the instruction is in a branch delay slot. */ -static int cop1Emulate(struct pt_regs *xcp, struct mips_fpu_struct *ctx) +static int cop1Emulate(struct pt_regs *xcp, struct mips_fpu_struct *ctx, + void *__user *fault_addr) { mips_instruction ir; unsigned long emulpc, contpc; unsigned int cond; - if (get_user(ir, (mips_instruction __user *) xcp->cp0_epc)) { + if (!access_ok(VERIFY_READ, xcp->cp0_epc, sizeof(mips_instruction))) { MIPS_FPU_EMU_INC_STATS(errors); + *fault_addr = (mips_instruction __user *)xcp->cp0_epc; return SIGBUS; } + if (__get_user(ir, (mips_instruction __user *) xcp->cp0_epc)) { + MIPS_FPU_EMU_INC_STATS(errors); + *fault_addr = (mips_instruction __user *)xcp->cp0_epc; + return SIGSEGV; + } /* XXX NEC Vr54xx bug workaround */ if ((xcp->cp0_cause & CAUSEF_BD) && !isBranchInstr(&ir)) @@ -245,10 +252,16 @@ static int cop1Emulate(struct pt_regs *xcp, struct mips_fpu_struct *ctx) #endif return SIGILL; } - if (get_user(ir, (mips_instruction __user *) emulpc)) { + if (!access_ok(VERIFY_READ, emulpc, sizeof(mips_instruction))) { MIPS_FPU_EMU_INC_STATS(errors); + *fault_addr = (mips_instruction __user *)emulpc; return SIGBUS; } + if (__get_user(ir, (mips_instruction __user *) emulpc)) { + MIPS_FPU_EMU_INC_STATS(errors); + *fault_addr = (mips_instruction __user *)emulpc; + return SIGSEGV; + } /* __compute_return_epc() will have updated cp0_epc */ contpc = xcp->cp0_epc; /* In order not to confuse ptrace() et al, tweak context */ @@ -269,10 +282,17 @@ static int cop1Emulate(struct pt_regs *xcp, struct mips_fpu_struct *ctx) u64 val; MIPS_FPU_EMU_INC_STATS(loads); - if (get_user(val, va)) { + + if (!access_ok(VERIFY_READ, va, sizeof(u64))) { MIPS_FPU_EMU_INC_STATS(errors); + *fault_addr = va; return SIGBUS; } + if (__get_user(val, va)) { + MIPS_FPU_EMU_INC_STATS(errors); + *fault_addr = va; + return SIGSEGV; + } DITOREG(val, MIPSInst_RT(ir)); break; } @@ -284,10 +304,16 @@ static int cop1Emulate(struct pt_regs *xcp, struct mips_fpu_struct *ctx) MIPS_FPU_EMU_INC_STATS(stores); DIFROMREG(val, MIPSInst_RT(ir)); - if (put_user(val, va)) { + if (!access_ok(VERIFY_WRITE, va, sizeof(u64))) { MIPS_FPU_EMU_INC_STATS(errors); + *fault_addr = va; return SIGBUS; } + if (__put_user(val, va)) { + MIPS_FPU_EMU_INC_STATS(errors); + *fault_addr = va; + return SIGSEGV; + } break; } @@ -297,10 +323,16 @@ static int cop1Emulate(struct pt_regs *xcp, struct mips_fpu_struct *ctx) u32 val; MIPS_FPU_EMU_INC_STATS(loads); - if (get_user(val, va)) { + if (!access_ok(VERIFY_READ, va, sizeof(u32))) { MIPS_FPU_EMU_INC_STATS(errors); + *fault_addr = va; return SIGBUS; } + if (__get_user(val, va)) { + MIPS_FPU_EMU_INC_STATS(errors); + *fault_addr = va; + return SIGSEGV; + } SITOREG(val, MIPSInst_RT(ir)); break; } @@ -312,10 +344,16 @@ static int cop1Emulate(struct pt_regs *xcp, struct mips_fpu_struct *ctx) MIPS_FPU_EMU_INC_STATS(stores); SIFROMREG(val, MIPSInst_RT(ir)); - if (put_user(val, va)) { + if (!access_ok(VERIFY_WRITE, va, sizeof(u32))) { MIPS_FPU_EMU_INC_STATS(errors); + *fault_addr = va; return SIGBUS; } + if (__put_user(val, va)) { + MIPS_FPU_EMU_INC_STATS(errors); + *fault_addr = va; + return SIGSEGV; + } break; } @@ -440,11 +478,18 @@ static int cop1Emulate(struct pt_regs *xcp, struct mips_fpu_struct *ctx) contpc = (xcp->cp0_epc + (MIPSInst_SIMM(ir) << 2)); - if (get_user(ir, - (mips_instruction __user *) xcp->cp0_epc)) { + if (!access_ok(VERIFY_READ, xcp->cp0_epc, + sizeof(mips_instruction))) { MIPS_FPU_EMU_INC_STATS(errors); + *fault_addr = (mips_instruction __user *)xcp->cp0_epc; return SIGBUS; } + if (__get_user(ir, + (mips_instruction __user *) xcp->cp0_epc)) { + MIPS_FPU_EMU_INC_STATS(errors); + *fault_addr = (mips_instruction __user *)xcp->cp0_epc; + return SIGSEGV; + } switch (MIPSInst_OPCODE(ir)) { case lwc1_op: @@ -506,9 +551,8 @@ static int cop1Emulate(struct pt_regs *xcp, struct mips_fpu_struct *ctx) #if __mips >= 4 && __mips != 32 case cop1x_op:{ - int sig; - - if ((sig = fpux_emu(xcp, ctx, ir))) + int sig = fpux_emu(xcp, ctx, ir, fault_addr); + if (sig) return sig; break; } @@ -604,7 +648,7 @@ DEF3OP(nmadd, dp, ieee754dp_mul, ieee754dp_add, ieee754dp_neg); DEF3OP(nmsub, dp, ieee754dp_mul, ieee754dp_sub, ieee754dp_neg); static int fpux_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx, - mips_instruction ir) + mips_instruction ir, void *__user *fault_addr) { unsigned rcsr = 0; /* resulting csr */ @@ -624,10 +668,16 @@ static int fpux_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx, xcp->regs[MIPSInst_FT(ir)]); MIPS_FPU_EMU_INC_STATS(loads); - if (get_user(val, va)) { + if (!access_ok(VERIFY_READ, va, sizeof(u32))) { MIPS_FPU_EMU_INC_STATS(errors); + *fault_addr = va; return SIGBUS; } + if (__get_user(val, va)) { + MIPS_FPU_EMU_INC_STATS(errors); + *fault_addr = va; + return SIGSEGV; + } SITOREG(val, MIPSInst_FD(ir)); break; @@ -638,10 +688,16 @@ static int fpux_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx, MIPS_FPU_EMU_INC_STATS(stores); SIFROMREG(val, MIPSInst_FS(ir)); - if (put_user(val, va)) { + if (!access_ok(VERIFY_WRITE, va, sizeof(u32))) { MIPS_FPU_EMU_INC_STATS(errors); + *fault_addr = va; return SIGBUS; } + if (put_user(val, va)) { + MIPS_FPU_EMU_INC_STATS(errors); + *fault_addr = va; + return SIGSEGV; + } break; case madd_s_op: @@ -701,10 +757,16 @@ static int fpux_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx, xcp->regs[MIPSInst_FT(ir)]); MIPS_FPU_EMU_INC_STATS(loads); - if (get_user(val, va)) { + if (!access_ok(VERIFY_READ, va, sizeof(u64))) { MIPS_FPU_EMU_INC_STATS(errors); + *fault_addr = va; return SIGBUS; } + if (__get_user(val, va)) { + MIPS_FPU_EMU_INC_STATS(errors); + *fault_addr = va; + return SIGSEGV; + } DITOREG(val, MIPSInst_FD(ir)); break; @@ -714,10 +776,16 @@ static int fpux_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx, MIPS_FPU_EMU_INC_STATS(stores); DIFROMREG(val, MIPSInst_FS(ir)); - if (put_user(val, va)) { + if (!access_ok(VERIFY_WRITE, va, sizeof(u64))) { MIPS_FPU_EMU_INC_STATS(errors); + *fault_addr = va; return SIGBUS; } + if (__put_user(val, va)) { + MIPS_FPU_EMU_INC_STATS(errors); + *fault_addr = va; + return SIGSEGV; + } break; case madd_d_op: @@ -1242,7 +1310,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx, } int fpu_emulator_cop1Handler(struct pt_regs *xcp, struct mips_fpu_struct *ctx, - int has_fpu) + int has_fpu, void *__user *fault_addr) { unsigned long oldepc, prevepc; mips_instruction insn; @@ -1252,10 +1320,16 @@ int fpu_emulator_cop1Handler(struct pt_regs *xcp, struct mips_fpu_struct *ctx, do { prevepc = xcp->cp0_epc; - if (get_user(insn, (mips_instruction __user *) xcp->cp0_epc)) { + if (!access_ok(VERIFY_READ, xcp->cp0_epc, sizeof(mips_instruction))) { MIPS_FPU_EMU_INC_STATS(errors); + *fault_addr = (mips_instruction __user *)xcp->cp0_epc; return SIGBUS; } + if (__get_user(insn, (mips_instruction __user *) xcp->cp0_epc)) { + MIPS_FPU_EMU_INC_STATS(errors); + *fault_addr = (mips_instruction __user *)xcp->cp0_epc; + return SIGSEGV; + } if (insn == 0) xcp->cp0_epc += 4; /* skip nops */ else { @@ -1267,7 +1341,7 @@ int fpu_emulator_cop1Handler(struct pt_regs *xcp, struct mips_fpu_struct *ctx, */ /* convert to ieee library modes */ ieee754_csr.rm = ieee_rm[ieee754_csr.rm]; - sig = cop1Emulate(xcp, ctx); + sig = cop1Emulate(xcp, ctx, fault_addr); /* revert to mips rounding mode */ ieee754_csr.rm = mips_rm[ieee754_csr.rm]; } diff --git a/arch/mips/mm/dma-default.c b/arch/mips/mm/dma-default.c index 4fc1a0fbe007..21ea14efb837 100644 --- a/arch/mips/mm/dma-default.c +++ b/arch/mips/mm/dma-default.c @@ -288,7 +288,7 @@ int mips_dma_supported(struct device *dev, u64 mask) return plat_dma_supported(dev, mask); } -void mips_dma_cache_sync(struct device *dev, void *vaddr, size_t size, +void dma_cache_sync(struct device *dev, void *vaddr, size_t size, enum dma_data_direction direction) { BUG_ON(direction == DMA_NONE); @@ -298,6 +298,8 @@ void mips_dma_cache_sync(struct device *dev, void *vaddr, size_t size, __dma_sync((unsigned long)vaddr, size, direction); } +EXPORT_SYMBOL(dma_cache_sync); + static struct dma_map_ops mips_default_dma_map_ops = { .alloc_coherent = mips_dma_alloc_coherent, .free_coherent = mips_dma_free_coherent, diff --git a/arch/mips/mm/sc-mips.c b/arch/mips/mm/sc-mips.c index 505fecad4684..9cca8de00545 100644 --- a/arch/mips/mm/sc-mips.c +++ b/arch/mips/mm/sc-mips.c @@ -68,6 +68,9 @@ static struct bcache_ops mips_sc_ops = { */ static inline int mips_sc_is_activated(struct cpuinfo_mips *c) { + unsigned int config2 = read_c0_config2(); + unsigned int tmp; + /* Check the bypass bit (L2B) */ switch (c->cputype) { case CPU_34K: @@ -83,6 +86,7 @@ static inline int mips_sc_is_activated(struct cpuinfo_mips *c) c->scache.linesz = 2 << tmp; else return 0; + return 1; } static inline int __init mips_sc_probe(void) diff --git a/arch/mips/pmc-sierra/yosemite/py-console.c b/arch/mips/pmc-sierra/yosemite/py-console.c index b7f1d9c4a8a3..434d7b1a8c6a 100644 --- a/arch/mips/pmc-sierra/yosemite/py-console.c +++ b/arch/mips/pmc-sierra/yosemite/py-console.c @@ -65,11 +65,15 @@ static unsigned char readb_outer_space(unsigned long long phys) __asm__ __volatile__ ( " .set mips3 \n" + " .set push \n" + " .set noreorder \n" + " .set nomacro \n" " ld %0, %1 \n" + " .set pop \n" " lbu %0, (%0) \n" " .set mips0 \n" : "=r" (res) - : "m" (vaddr)); + : "R" (vaddr)); write_c0_status(sr); ssnop_4(); @@ -89,11 +93,15 @@ static void writeb_outer_space(unsigned long long phys, unsigned char c) __asm__ __volatile__ ( " .set mips3 \n" + " .set push \n" + " .set noreorder \n" + " .set nomacro \n" " ld %0, %1 \n" + " .set pop \n" " sb %2, (%0) \n" " .set mips0 \n" : "=&r" (tmp) - : "m" (vaddr), "r" (c)); + : "R" (vaddr), "r" (c)); write_c0_status(sr); ssnop_4(); diff --git a/arch/mips/sibyte/swarm/setup.c b/arch/mips/sibyte/swarm/setup.c index c308989fc464..41707a245dea 100644 --- a/arch/mips/sibyte/swarm/setup.c +++ b/arch/mips/sibyte/swarm/setup.c @@ -82,7 +82,7 @@ int swarm_be_handler(struct pt_regs *regs, int is_fixup) enum swarm_rtc_type { RTC_NONE, RTC_XICOR, - RTC_M4LT81 + RTC_M41T81, }; enum swarm_rtc_type swarm_rtc_type; @@ -96,7 +96,7 @@ void read_persistent_clock(struct timespec *ts) sec = xicor_get_time(); break; - case RTC_M4LT81: + case RTC_M41T81: sec = m41t81_get_time(); break; @@ -115,7 +115,7 @@ int rtc_mips_set_time(unsigned long sec) case RTC_XICOR: return xicor_set_time(sec); - case RTC_M4LT81: + case RTC_M41T81: return m41t81_set_time(sec); case RTC_NONE: @@ -141,7 +141,7 @@ void __init plat_mem_setup(void) if (xicor_probe()) swarm_rtc_type = RTC_XICOR; if (m41t81_probe()) - swarm_rtc_type = RTC_M4LT81; + swarm_rtc_type = RTC_M41T81; #ifdef CONFIG_VT screen_info = (struct screen_info) { diff --git a/arch/mn10300/kernel/time.c b/arch/mn10300/kernel/time.c index f860a340acc9..75da468090b9 100644 --- a/arch/mn10300/kernel/time.c +++ b/arch/mn10300/kernel/time.c @@ -40,21 +40,17 @@ unsigned long long sched_clock(void) unsigned long long ll; unsigned l[2]; } tsc64, result; - unsigned long tsc, tmp; + unsigned long tmp; unsigned product[3]; /* 96-bit intermediate value */ /* cnt32_to_63() is not safe with preemption */ preempt_disable(); - /* read the TSC value - */ - tsc = get_cycles(); - - /* expand to 64-bits. + /* expand the tsc to 64-bits. * - sched_clock() must be called once a minute or better or the * following will go horribly wrong - see cnt32_to_63() */ - tsc64.ll = cnt32_to_63(tsc) & 0x7fffffffffffffffULL; + tsc64.ll = cnt32_to_63(get_cycles()) & 0x7fffffffffffffffULL; preempt_enable(); diff --git a/arch/powerpc/kernel/e500-pmu.c b/arch/powerpc/kernel/e500-pmu.c index 7c07de0d8943..b150b510510f 100644 --- a/arch/powerpc/kernel/e500-pmu.c +++ b/arch/powerpc/kernel/e500-pmu.c @@ -126,4 +126,4 @@ static int init_e500_pmu(void) return register_fsl_emb_pmu(&e500_pmu); } -arch_initcall(init_e500_pmu); +early_initcall(init_e500_pmu); diff --git a/arch/powerpc/kernel/mpc7450-pmu.c b/arch/powerpc/kernel/mpc7450-pmu.c index 09d72028f317..2cc5e0301d0b 100644 --- a/arch/powerpc/kernel/mpc7450-pmu.c +++ b/arch/powerpc/kernel/mpc7450-pmu.c @@ -414,4 +414,4 @@ static int init_mpc7450_pmu(void) return register_power_pmu(&mpc7450_pmu); } -arch_initcall(init_mpc7450_pmu); +early_initcall(init_mpc7450_pmu); diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c index 3129c855933c..567480705789 100644 --- a/arch/powerpc/kernel/perf_event.c +++ b/arch/powerpc/kernel/perf_event.c @@ -1379,7 +1379,7 @@ int register_power_pmu(struct power_pmu *pmu) freeze_events_kernel = MMCR0_FCHV; #endif /* CONFIG_PPC64 */ - perf_pmu_register(&power_pmu); + perf_pmu_register(&power_pmu, "cpu", PERF_TYPE_RAW); perf_cpu_notifier(power_pmu_notifier); return 0; diff --git a/arch/powerpc/kernel/perf_event_fsl_emb.c b/arch/powerpc/kernel/perf_event_fsl_emb.c index 7ecca59ddf77..4dcf5f831e9d 100644 --- a/arch/powerpc/kernel/perf_event_fsl_emb.c +++ b/arch/powerpc/kernel/perf_event_fsl_emb.c @@ -681,7 +681,7 @@ int register_fsl_emb_pmu(struct fsl_emb_pmu *pmu) pr_info("%s performance monitor hardware support registered\n", pmu->name); - perf_pmu_register(&fsl_emb_pmu); + perf_pmu_register(&fsl_emb_pmu, "cpu", PERF_TYPE_RAW); return 0; } diff --git a/arch/powerpc/kernel/power4-pmu.c b/arch/powerpc/kernel/power4-pmu.c index 2a361cdda635..ead8b3c2649e 100644 --- a/arch/powerpc/kernel/power4-pmu.c +++ b/arch/powerpc/kernel/power4-pmu.c @@ -613,4 +613,4 @@ static int init_power4_pmu(void) return register_power_pmu(&power4_pmu); } -arch_initcall(init_power4_pmu); +early_initcall(init_power4_pmu); diff --git a/arch/powerpc/kernel/power5+-pmu.c b/arch/powerpc/kernel/power5+-pmu.c index 199de527d411..eca0ac595cb6 100644 --- a/arch/powerpc/kernel/power5+-pmu.c +++ b/arch/powerpc/kernel/power5+-pmu.c @@ -682,4 +682,4 @@ static int init_power5p_pmu(void) return register_power_pmu(&power5p_pmu); } -arch_initcall(init_power5p_pmu); +early_initcall(init_power5p_pmu); diff --git a/arch/powerpc/kernel/power5-pmu.c b/arch/powerpc/kernel/power5-pmu.c index 98b6a729a9dd..d5ff0f64a5e6 100644 --- a/arch/powerpc/kernel/power5-pmu.c +++ b/arch/powerpc/kernel/power5-pmu.c @@ -621,4 +621,4 @@ static int init_power5_pmu(void) return register_power_pmu(&power5_pmu); } -arch_initcall(init_power5_pmu); +early_initcall(init_power5_pmu); diff --git a/arch/powerpc/kernel/power6-pmu.c b/arch/powerpc/kernel/power6-pmu.c index 84a607bda8fb..31603927e376 100644 --- a/arch/powerpc/kernel/power6-pmu.c +++ b/arch/powerpc/kernel/power6-pmu.c @@ -544,4 +544,4 @@ static int init_power6_pmu(void) return register_power_pmu(&power6_pmu); } -arch_initcall(init_power6_pmu); +early_initcall(init_power6_pmu); diff --git a/arch/powerpc/kernel/power7-pmu.c b/arch/powerpc/kernel/power7-pmu.c index 852f7b7f6b40..593740fcb799 100644 --- a/arch/powerpc/kernel/power7-pmu.c +++ b/arch/powerpc/kernel/power7-pmu.c @@ -369,4 +369,4 @@ static int init_power7_pmu(void) return register_power_pmu(&power7_pmu); } -arch_initcall(init_power7_pmu); +early_initcall(init_power7_pmu); diff --git a/arch/powerpc/kernel/ppc970-pmu.c b/arch/powerpc/kernel/ppc970-pmu.c index 3fee685de4df..9a6e093858fe 100644 --- a/arch/powerpc/kernel/ppc970-pmu.c +++ b/arch/powerpc/kernel/ppc970-pmu.c @@ -494,4 +494,4 @@ static int init_ppc970_pmu(void) return register_power_pmu(&ppc970_pmu); } -arch_initcall(init_ppc970_pmu); +early_initcall(init_ppc970_pmu); diff --git a/arch/sh/kernel/cpu/sh4/perf_event.c b/arch/sh/kernel/cpu/sh4/perf_event.c index dbf3b4bb71fe..748955df018d 100644 --- a/arch/sh/kernel/cpu/sh4/perf_event.c +++ b/arch/sh/kernel/cpu/sh4/perf_event.c @@ -250,4 +250,4 @@ static int __init sh7750_pmu_init(void) return register_sh_pmu(&sh7750_pmu); } -arch_initcall(sh7750_pmu_init); +early_initcall(sh7750_pmu_init); diff --git a/arch/sh/kernel/cpu/sh4a/perf_event.c b/arch/sh/kernel/cpu/sh4a/perf_event.c index 580276525731..17e6bebfede0 100644 --- a/arch/sh/kernel/cpu/sh4a/perf_event.c +++ b/arch/sh/kernel/cpu/sh4a/perf_event.c @@ -284,4 +284,4 @@ static int __init sh4a_pmu_init(void) return register_sh_pmu(&sh4a_pmu); } -arch_initcall(sh4a_pmu_init); +early_initcall(sh4a_pmu_init); diff --git a/arch/sh/kernel/perf_event.c b/arch/sh/kernel/perf_event.c index 5a4b33435650..2ee21a47b5af 100644 --- a/arch/sh/kernel/perf_event.c +++ b/arch/sh/kernel/perf_event.c @@ -389,7 +389,7 @@ int __cpuinit register_sh_pmu(struct sh_pmu *_pmu) WARN_ON(_pmu->num_events > MAX_HWEVENTS); - perf_pmu_register(&pmu); + perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW); perf_cpu_notifier(sh_pmu_notifier); return 0; } diff --git a/arch/sparc/include/asm/perf_event.h b/arch/sparc/include/asm/perf_event.h index 6e8bfa1786da..4d3dbe3703e9 100644 --- a/arch/sparc/include/asm/perf_event.h +++ b/arch/sparc/include/asm/perf_event.h @@ -4,8 +4,6 @@ #ifdef CONFIG_PERF_EVENTS #include <asm/ptrace.h> -extern void init_hw_perf_events(void); - #define perf_arch_fetch_caller_regs(regs, ip) \ do { \ unsigned long _pstate, _asi, _pil, _i7, _fp; \ @@ -26,8 +24,6 @@ do { \ (regs)->u_regs[UREG_I6] = _fp; \ (regs)->u_regs[UREG_I7] = _i7; \ } while (0) -#else -static inline void init_hw_perf_events(void) { } #endif #endif diff --git a/arch/sparc/kernel/nmi.c b/arch/sparc/kernel/nmi.c index a4bd7ba74c89..300f810142f5 100644 --- a/arch/sparc/kernel/nmi.c +++ b/arch/sparc/kernel/nmi.c @@ -270,8 +270,6 @@ int __init nmi_init(void) atomic_set(&nmi_active, -1); } } - if (!err) - init_hw_perf_events(); return err; } diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 0d6deb55a2ae..760578687e7c 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -1307,20 +1307,23 @@ static bool __init supported_pmu(void) return false; } -void __init init_hw_perf_events(void) +int __init init_hw_perf_events(void) { pr_info("Performance events: "); if (!supported_pmu()) { pr_cont("No support for PMU type '%s'\n", sparc_pmu_type); - return; + return 0; } pr_cont("Supported PMU type is '%s'\n", sparc_pmu_type); - perf_pmu_register(&pmu); + perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW); register_die_notifier(&perf_event_nmi_notifier); + + return 0; } +early_initcall(init_hw_perf_events); void perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) diff --git a/arch/tile/include/asm/signal.h b/arch/tile/include/asm/signal.h index c1ee1d61d44c..81d92a45cd4b 100644 --- a/arch/tile/include/asm/signal.h +++ b/arch/tile/include/asm/signal.h @@ -25,7 +25,7 @@ #if defined(__KERNEL__) && !defined(__ASSEMBLY__) struct pt_regs; -int restore_sigcontext(struct pt_regs *, struct sigcontext __user *, long *); +int restore_sigcontext(struct pt_regs *, struct sigcontext __user *); int setup_sigcontext(struct sigcontext __user *, struct pt_regs *); void do_signal(struct pt_regs *regs); #endif diff --git a/arch/tile/kernel/compat_signal.c b/arch/tile/kernel/compat_signal.c index 543d6a33aa26..dbb0dfc7bece 100644 --- a/arch/tile/kernel/compat_signal.c +++ b/arch/tile/kernel/compat_signal.c @@ -290,12 +290,12 @@ long compat_sys_sigaltstack(const struct compat_sigaltstack __user *uss_ptr, return ret; } +/* The assembly shim for this function arranges to ignore the return value. */ long compat_sys_rt_sigreturn(struct pt_regs *regs) { struct compat_rt_sigframe __user *frame = (struct compat_rt_sigframe __user *) compat_ptr(regs->sp); sigset_t set; - long r0; if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) goto badframe; @@ -308,13 +308,13 @@ long compat_sys_rt_sigreturn(struct pt_regs *regs) recalc_sigpending(); spin_unlock_irq(¤t->sighand->siglock); - if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &r0)) + if (restore_sigcontext(regs, &frame->uc.uc_mcontext)) goto badframe; if (compat_sys_sigaltstack(&frame->uc.uc_stack, NULL, regs) != 0) goto badframe; - return r0; + return 0; badframe: force_sig(SIGSEGV, current); diff --git a/arch/tile/kernel/intvec_32.S b/arch/tile/kernel/intvec_32.S index f5821626247f..5eed4a02bf62 100644 --- a/arch/tile/kernel/intvec_32.S +++ b/arch/tile/kernel/intvec_32.S @@ -1342,8 +1342,8 @@ handle_syscall: lw r20, r20 /* Jump to syscall handler. */ - jalr r20; .Lhandle_syscall_link: - FEEDBACK_REENTER(handle_syscall) + jalr r20 +.Lhandle_syscall_link: /* value of "lr" after "jalr r20" above */ /* * Write our r0 onto the stack so it gets restored instead @@ -1352,6 +1352,9 @@ handle_syscall: PTREGS_PTR(r29, PTREGS_OFFSET_REG(0)) sw r29, r0 +.Lsyscall_sigreturn_skip: + FEEDBACK_REENTER(handle_syscall) + /* Do syscall trace again, if requested. */ lw r30, r31 andi r30, r30, _TIF_SYSCALL_TRACE @@ -1536,9 +1539,24 @@ STD_ENTRY_LOCAL(bad_intr) }; \ STD_ENDPROC(_##x) +/* + * Special-case sigreturn to not write r0 to the stack on return. + * This is technically more efficient, but it also avoids difficulties + * in the 64-bit OS when handling 32-bit compat code, since we must not + * sign-extend r0 for the sigreturn return-value case. + */ +#define PTREGS_SYSCALL_SIGRETURN(x, reg) \ + STD_ENTRY(_##x); \ + addli lr, lr, .Lsyscall_sigreturn_skip - .Lhandle_syscall_link; \ + { \ + PTREGS_PTR(reg, PTREGS_OFFSET_BASE); \ + j x \ + }; \ + STD_ENDPROC(_##x) + PTREGS_SYSCALL(sys_execve, r3) PTREGS_SYSCALL(sys_sigaltstack, r2) -PTREGS_SYSCALL(sys_rt_sigreturn, r0) +PTREGS_SYSCALL_SIGRETURN(sys_rt_sigreturn, r0) PTREGS_SYSCALL(sys_cmpxchg_badaddr, r1) /* Save additional callee-saves to pt_regs, put address in r4 and jump. */ diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c index 8430f45daea6..e90eb53173b0 100644 --- a/arch/tile/kernel/process.c +++ b/arch/tile/kernel/process.c @@ -212,6 +212,13 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, childregs->sp = sp; /* override with new user stack pointer */ /* + * If CLONE_SETTLS is set, set "tp" in the new task to "r4", + * which is passed in as arg #5 to sys_clone(). + */ + if (clone_flags & CLONE_SETTLS) + childregs->tp = regs->regs[4]; + + /* * Copy the callee-saved registers from the passed pt_regs struct * into the context-switch callee-saved registers area. * This way when we start the interrupt-return sequence, the @@ -539,6 +546,7 @@ struct task_struct *__sched _switch_to(struct task_struct *prev, return __switch_to(prev, next, next_current_ksp0(next)); } +/* Note there is an implicit fifth argument if (clone_flags & CLONE_SETTLS). */ SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp, void __user *, parent_tidptr, void __user *, child_tidptr, struct pt_regs *, regs) diff --git a/arch/tile/kernel/signal.c b/arch/tile/kernel/signal.c index 757407e36696..1260321155f1 100644 --- a/arch/tile/kernel/signal.c +++ b/arch/tile/kernel/signal.c @@ -52,7 +52,7 @@ SYSCALL_DEFINE3(sigaltstack, const stack_t __user *, uss, */ int restore_sigcontext(struct pt_regs *regs, - struct sigcontext __user *sc, long *pr0) + struct sigcontext __user *sc) { int err = 0; int i; @@ -75,17 +75,15 @@ int restore_sigcontext(struct pt_regs *regs, regs->faultnum = INT_SWINT_1_SIGRETURN; - err |= __get_user(*pr0, &sc->gregs[0]); return err; } -/* sigreturn() returns long since it restores r0 in the interrupted code. */ +/* The assembly shim for this function arranges to ignore the return value. */ SYSCALL_DEFINE1(rt_sigreturn, struct pt_regs *, regs) { struct rt_sigframe __user *frame = (struct rt_sigframe __user *)(regs->sp); sigset_t set; - long r0; if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) goto badframe; @@ -98,13 +96,13 @@ SYSCALL_DEFINE1(rt_sigreturn, struct pt_regs *, regs) recalc_sigpending(); spin_unlock_irq(¤t->sighand->siglock); - if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &r0)) + if (restore_sigcontext(regs, &frame->uc.uc_mcontext)) goto badframe; if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->sp) == -EFAULT) goto badframe; - return r0; + return 0; badframe: force_sig(SIGSEGV, current); diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 23f315c9f215..325c05294fc4 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -355,7 +355,7 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap, if (heap > 0x3fffffffffffUL) error("Destination address too large"); #else - if (heap > ((-__PAGE_OFFSET-(512<<20)-1) & 0x7fffffff)) + if (heap > ((-__PAGE_OFFSET-(128<<20)-1) & 0x7fffffff)) error("Destination address too large"); #endif #ifndef CONFIG_RELOCATABLE diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 76561d20ea2f..4a2adaa9aefc 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -180,8 +180,15 @@ extern void *text_poke_early(void *addr, const void *opcode, size_t len); * On the local CPU you need to be protected again NMI or MCE handlers seeing an * inconsistent instruction while you patch. */ +struct text_poke_param { + void *addr; + const void *opcode; + size_t len; +}; + extern void *text_poke(void *addr, const void *opcode, size_t len); extern void *text_poke_smp(void *addr, const void *opcode, size_t len); +extern void text_poke_smp_batch(struct text_poke_param *params, int n); #if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL) #define IDEAL_NOP_SIZE_5 5 diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h index 5be1542fbfaf..e99d55d74df5 100644 --- a/arch/x86/include/asm/e820.h +++ b/arch/x86/include/asm/e820.h @@ -72,6 +72,9 @@ struct e820map { #define BIOS_BEGIN 0x000a0000 #define BIOS_END 0x00100000 +#define BIOS_ROM_BASE 0xffe00000 +#define BIOS_ROM_END 0xffffffff + #ifdef __KERNEL__ /* see comment in arch/x86/kernel/e820.c */ extern struct e820map e820; diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h index 13b0ebaa512f..ba870bb6dd8e 100644 --- a/arch/x86/include/asm/irq.h +++ b/arch/x86/include/asm/irq.h @@ -15,10 +15,6 @@ static inline int irq_canonicalize(int irq) return ((irq == 2) ? 9 : irq); } -#ifdef CONFIG_X86_LOCAL_APIC -# define ARCH_HAS_NMI_WATCHDOG -#endif - #ifdef CONFIG_X86_32 extern void irq_ctx_init(int cpu); #else diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h index 5bdfca86581b..f23eb2528464 100644 --- a/arch/x86/include/asm/kdebug.h +++ b/arch/x86/include/asm/kdebug.h @@ -28,7 +28,7 @@ extern void die(const char *, struct pt_regs *,long); extern int __must_check __die(const char *, struct pt_regs *, long); extern void show_registers(struct pt_regs *regs); extern void show_trace(struct task_struct *t, struct pt_regs *regs, - unsigned long *sp, unsigned long bp); + unsigned long *sp); extern void __show_regs(struct pt_regs *regs, int all); extern void show_regs(struct pt_regs *regs); extern unsigned long oops_begin(void); diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 9e6fe391094e..f702f82aa1eb 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -79,7 +79,7 @@ #define KVM_NUM_MMU_PAGES (1 << KVM_MMU_HASH_SHIFT) #define KVM_MIN_FREE_MMU_PAGES 5 #define KVM_REFILL_PAGES 25 -#define KVM_MAX_CPUID_ENTRIES 40 +#define KVM_MAX_CPUID_ENTRIES 80 #define KVM_NR_FIXED_MTRR_REGION 88 #define KVM_NR_VAR_MTRR 8 diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h index 932f0f86b4b7..c4021b953510 100644 --- a/arch/x86/include/asm/nmi.h +++ b/arch/x86/include/asm/nmi.h @@ -5,41 +5,15 @@ #include <asm/irq.h> #include <asm/io.h> -#ifdef ARCH_HAS_NMI_WATCHDOG - -/** - * do_nmi_callback - * - * Check to see if a callback exists and execute it. Return 1 - * if the handler exists and was handled successfully. - */ -int do_nmi_callback(struct pt_regs *regs, int cpu); +#ifdef CONFIG_X86_LOCAL_APIC extern void die_nmi(char *str, struct pt_regs *regs, int do_panic); -extern int check_nmi_watchdog(void); -#if !defined(CONFIG_LOCKUP_DETECTOR) -extern int nmi_watchdog_enabled; -#endif extern int avail_to_resrv_perfctr_nmi_bit(unsigned int); extern int reserve_perfctr_nmi(unsigned int); extern void release_perfctr_nmi(unsigned int); extern int reserve_evntsel_nmi(unsigned int); extern void release_evntsel_nmi(unsigned int); -extern void setup_apic_nmi_watchdog(void *); -extern void stop_apic_nmi_watchdog(void *); -extern void disable_timer_nmi_watchdog(void); -extern void enable_timer_nmi_watchdog(void); -extern int nmi_watchdog_tick(struct pt_regs *regs, unsigned reason); -extern void cpu_nmi_set_wd_enabled(void); - -extern atomic_t nmi_active; -extern unsigned int nmi_watchdog; -#define NMI_NONE 0 -#define NMI_IO_APIC 1 -#define NMI_LOCAL_APIC 2 -#define NMI_INVALID 3 - struct ctl_table; extern int proc_nmi_enabled(struct ctl_table *, int , void __user *, size_t *, loff_t *); @@ -47,33 +21,8 @@ extern int unknown_nmi_panic; void arch_trigger_all_cpu_backtrace(void); #define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace - -static inline void localise_nmi_watchdog(void) -{ - if (nmi_watchdog == NMI_IO_APIC) - nmi_watchdog = NMI_LOCAL_APIC; -} - -/* check if nmi_watchdog is active (ie was specified at boot) */ -static inline int nmi_watchdog_active(void) -{ - /* - * actually it should be: - * return (nmi_watchdog == NMI_LOCAL_APIC || - * nmi_watchdog == NMI_IO_APIC) - * but since they are power of two we could use a - * cheaper way --cvg - */ - return nmi_watchdog & (NMI_LOCAL_APIC | NMI_IO_APIC); -} #endif -void lapic_watchdog_stop(void); -int lapic_watchdog_init(unsigned nmi_hz); -int lapic_wd_event(unsigned nmi_hz); -unsigned lapic_adjust_nmi_hz(unsigned hz); -void disable_lapic_nmi_watchdog(void); -void enable_lapic_nmi_watchdog(void); void stop_nmi(void); void restart_nmi(void); diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 550e26b1dbb3..d9d4dae305f6 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -125,7 +125,6 @@ union cpuid10_edx { #define IBS_OP_MAX_CNT_EXT 0x007FFFFFULL /* not a register bit mask */ #ifdef CONFIG_PERF_EVENTS -extern void init_hw_perf_events(void); extern void perf_events_lapic_init(void); #define PERF_EVENT_INDEX_OFFSET 0 @@ -156,7 +155,6 @@ extern unsigned long perf_misc_flags(struct pt_regs *regs); } #else -static inline void init_hw_perf_events(void) { } static inline void perf_events_lapic_init(void) { } #endif diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h index a70cd216be5d..295e2ff18a6a 100644 --- a/arch/x86/include/asm/perf_event_p4.h +++ b/arch/x86/include/asm/perf_event_p4.h @@ -744,14 +744,6 @@ enum P4_ESCR_EMASKS { }; /* - * P4 PEBS specifics (Replay Event only) - * - * Format (bits): - * 0-6: metric from P4_PEBS_METRIC enum - * 7 : reserved - * 8 : reserved - * 9-11 : reserved - * * Note we have UOP and PEBS bits reserved for now * just in case if we will need them once */ @@ -788,5 +780,60 @@ enum P4_PEBS_METRIC { P4_PEBS_METRIC__max }; +/* + * Notes on internal configuration of ESCR+CCCR tuples + * + * Since P4 has quite the different architecture of + * performance registers in compare with "architectural" + * once and we have on 64 bits to keep configuration + * of performance event, the following trick is used. + * + * 1) Since both ESCR and CCCR registers have only low + * 32 bits valuable, we pack them into a single 64 bit + * configuration. Low 32 bits of such config correspond + * to low 32 bits of CCCR register and high 32 bits + * correspond to low 32 bits of ESCR register. + * + * 2) The meaning of every bit of such config field can + * be found in Intel SDM but it should be noted that + * we "borrow" some reserved bits for own usage and + * clean them or set to a proper value when we do + * a real write to hardware registers. + * + * 3) The format of bits of config is the following + * and should be either 0 or set to some predefined + * values: + * + * Low 32 bits + * ----------- + * 0-6: P4_PEBS_METRIC enum + * 7-11: reserved + * 12: reserved (Enable) + * 13-15: reserved (ESCR select) + * 16-17: Active Thread + * 18: Compare + * 19: Complement + * 20-23: Threshold + * 24: Edge + * 25: reserved (FORCE_OVF) + * 26: reserved (OVF_PMI_T0) + * 27: reserved (OVF_PMI_T1) + * 28-29: reserved + * 30: reserved (Cascade) + * 31: reserved (OVF) + * + * High 32 bits + * ------------ + * 0: reserved (T1_USR) + * 1: reserved (T1_OS) + * 2: reserved (T0_USR) + * 3: reserved (T0_OS) + * 4: Tag Enable + * 5-8: Tag Value + * 9-24: Event Mask (may use P4_ESCR_EMASK_BIT helper) + * 25-30: enum P4_EVENTS + * 31: reserved (HT thread) + */ + #endif /* PERF_EVENT_P4_H */ diff --git a/arch/x86/include/asm/smpboot_hooks.h b/arch/x86/include/asm/smpboot_hooks.h index 1def60114906..6c22bf353f26 100644 --- a/arch/x86/include/asm/smpboot_hooks.h +++ b/arch/x86/include/asm/smpboot_hooks.h @@ -48,7 +48,6 @@ static inline void __init smpboot_setup_io_apic(void) setup_IO_APIC(); else { nr_ioapics = 0; - localise_nmi_watchdog(); } #endif } diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h index 2b16a2ad23dc..52b5c7ed3608 100644 --- a/arch/x86/include/asm/stacktrace.h +++ b/arch/x86/include/asm/stacktrace.h @@ -7,6 +7,7 @@ #define _ASM_X86_STACKTRACE_H #include <linux/uaccess.h> +#include <linux/ptrace.h> extern int kstack_depth_to_print; @@ -46,7 +47,7 @@ struct stacktrace_ops { }; void dump_trace(struct task_struct *tsk, struct pt_regs *regs, - unsigned long *stack, unsigned long bp, + unsigned long *stack, const struct stacktrace_ops *ops, void *data); #ifdef CONFIG_X86_32 @@ -57,13 +58,39 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs, #define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :) #endif +#ifdef CONFIG_FRAME_POINTER +static inline unsigned long +stack_frame(struct task_struct *task, struct pt_regs *regs) +{ + unsigned long bp; + + if (regs) + return regs->bp; + + if (task == current) { + /* Grab bp right from our regs */ + get_bp(bp); + return bp; + } + + /* bp is the last reg pushed by switch_to */ + return *(unsigned long *)task->thread.sp; +} +#else +static inline unsigned long +stack_frame(struct task_struct *task, struct pt_regs *regs) +{ + return 0; +} +#endif + extern void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, - unsigned long *stack, unsigned long bp, char *log_lvl); + unsigned long *stack, char *log_lvl); extern void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, - unsigned long *sp, unsigned long bp, char *log_lvl); + unsigned long *sp, char *log_lvl); extern unsigned int code_bytes; diff --git a/arch/x86/include/asm/timer.h b/arch/x86/include/asm/timer.h index 5469630b27f5..fa7b9176b76c 100644 --- a/arch/x86/include/asm/timer.h +++ b/arch/x86/include/asm/timer.h @@ -10,12 +10,6 @@ unsigned long long native_sched_clock(void); extern int recalibrate_cpu_khz(void); -#if defined(CONFIG_X86_32) && defined(CONFIG_X86_IO_APIC) -extern int timer_ack; -#else -# define timer_ack (0) -#endif - extern int no_timer_check; /* Accelerators for sched_clock() diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 9e13763b6092..1e994754d323 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -45,6 +45,7 @@ obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o obj-y += tsc.o io_delay.o rtc.o obj-y += pci-iommu_table.o +obj-y += resource.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o obj-y += process.o diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 5079f24c955a..553d0b0d639b 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -591,17 +591,21 @@ static atomic_t stop_machine_first; static int wrote_text; struct text_poke_params { - void *addr; - const void *opcode; - size_t len; + struct text_poke_param *params; + int nparams; }; static int __kprobes stop_machine_text_poke(void *data) { struct text_poke_params *tpp = data; + struct text_poke_param *p; + int i; if (atomic_dec_and_test(&stop_machine_first)) { - text_poke(tpp->addr, tpp->opcode, tpp->len); + for (i = 0; i < tpp->nparams; i++) { + p = &tpp->params[i]; + text_poke(p->addr, p->opcode, p->len); + } smp_wmb(); /* Make sure other cpus see that this has run */ wrote_text = 1; } else { @@ -610,8 +614,12 @@ static int __kprobes stop_machine_text_poke(void *data) smp_mb(); /* Load wrote_text before following execution */ } - flush_icache_range((unsigned long)tpp->addr, - (unsigned long)tpp->addr + tpp->len); + for (i = 0; i < tpp->nparams; i++) { + p = &tpp->params[i]; + flush_icache_range((unsigned long)p->addr, + (unsigned long)p->addr + p->len); + } + return 0; } @@ -631,10 +639,13 @@ static int __kprobes stop_machine_text_poke(void *data) void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len) { struct text_poke_params tpp; + struct text_poke_param p; - tpp.addr = addr; - tpp.opcode = opcode; - tpp.len = len; + p.addr = addr; + p.opcode = opcode; + p.len = len; + tpp.params = &p; + tpp.nparams = 1; atomic_set(&stop_machine_first, 1); wrote_text = 0; /* Use __stop_machine() because the caller already got online_cpus. */ @@ -642,6 +653,26 @@ void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len) return addr; } +/** + * text_poke_smp_batch - Update instructions on a live kernel on SMP + * @params: an array of text_poke parameters + * @n: the number of elements in params. + * + * Modify multi-byte instruction by using stop_machine() on SMP. Since the + * stop_machine() is heavy task, it is better to aggregate text_poke requests + * and do it once if possible. + * + * Note: Must be called under get_online_cpus() and text_mutex. + */ +void __kprobes text_poke_smp_batch(struct text_poke_param *params, int n) +{ + struct text_poke_params tpp = {.params = params, .nparams = n}; + + atomic_set(&stop_machine_first, 1); + wrote_text = 0; + stop_machine(stop_machine_text_poke, (void *)&tpp, NULL); +} + #if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL) #ifdef CONFIG_X86_64 diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile index 910f20b457c4..3966b564ea47 100644 --- a/arch/x86/kernel/apic/Makefile +++ b/arch/x86/kernel/apic/Makefile @@ -3,10 +3,7 @@ # obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_noop.o probe_$(BITS).o ipi.o -ifneq ($(CONFIG_HARDLOCKUP_DETECTOR),y) -obj-$(CONFIG_X86_LOCAL_APIC) += nmi.o -endif -obj-$(CONFIG_HARDLOCKUP_DETECTOR) += hw_nmi.o +obj-y += hw_nmi.o obj-$(CONFIG_X86_IO_APIC) += io_apic.o obj-$(CONFIG_SMP) += ipi.o diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 3f838d537392..fb7657822aad 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -31,7 +31,6 @@ #include <linux/init.h> #include <linux/cpu.h> #include <linux/dmi.h> -#include <linux/nmi.h> #include <linux/smp.h> #include <linux/mm.h> @@ -799,11 +798,7 @@ void __init setup_boot_APIC_clock(void) * PIT/HPET going. Otherwise register lapic as a dummy * device. */ - if (nmi_watchdog != NMI_IO_APIC) - lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY; - else - pr_warning("APIC timer registered as dummy," - " due to nmi_watchdog=%d!\n", nmi_watchdog); + lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY; /* Setup the lapic or request the broadcast */ setup_APIC_timer(); @@ -1387,8 +1382,15 @@ void __cpuinit end_local_APIC_setup(void) } #endif - setup_apic_nmi_watchdog(NULL); apic_pm_activate(); + + /* + * Now that local APIC setup is completed for BP, configure the fault + * handling for interrupt remapping. + */ + if (!smp_processor_id() && intr_remapping_enabled) + enable_drhd_fault_handling(); + } #ifdef CONFIG_X86_X2APIC @@ -1750,17 +1752,10 @@ int __init APIC_init_uniprocessor(void) setup_IO_APIC(); else { nr_ioapics = 0; - localise_nmi_watchdog(); } -#else - localise_nmi_watchdog(); #endif x86_init.timers.setup_percpu_clockev(); -#ifdef CONFIG_X86_64 - check_nmi_watchdog(); -#endif - return 0; } diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c index 62f6e1e55b90..c57d0b599448 100644 --- a/arch/x86/kernel/apic/hw_nmi.c +++ b/arch/x86/kernel/apic/hw_nmi.c @@ -17,13 +17,14 @@ #include <linux/nmi.h> #include <linux/module.h> +#ifdef CONFIG_HARDLOCKUP_DETECTOR u64 hw_nmi_get_sample_period(void) { return (u64)(cpu_khz) * 1000 * 60; } +#endif -#ifdef ARCH_HAS_NMI_WATCHDOG - +#ifdef arch_trigger_all_cpu_backtrace /* For reliability, we're prepared to waste bits here. */ static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly; @@ -91,18 +92,3 @@ static int __init register_trigger_all_cpu_backtrace(void) } early_initcall(register_trigger_all_cpu_backtrace); #endif - -/* STUB calls to mimic old nmi_watchdog behaviour */ -#if defined(CONFIG_X86_LOCAL_APIC) -unsigned int nmi_watchdog = NMI_NONE; -EXPORT_SYMBOL(nmi_watchdog); -void acpi_nmi_enable(void) { return; } -void acpi_nmi_disable(void) { return; } -#endif -atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */ -EXPORT_SYMBOL(nmi_active); -int unknown_nmi_panic; -void cpu_nmi_set_wd_enabled(void) { return; } -void stop_apic_nmi_watchdog(void *unused) { return; } -void setup_apic_nmi_watchdog(void *unused) { return; } -int __init check_nmi_watchdog(void) { return 0; } diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 7cc0a721f628..16c2db8750a2 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -54,7 +54,6 @@ #include <asm/dma.h> #include <asm/timer.h> #include <asm/i8259.h> -#include <asm/nmi.h> #include <asm/msidef.h> #include <asm/hypertransport.h> #include <asm/setup.h> @@ -2430,13 +2429,12 @@ static void ack_apic_level(struct irq_data *data) { struct irq_cfg *cfg = data->chip_data; int i, do_unmask_irq = 0, irq = data->irq; - struct irq_desc *desc = irq_to_desc(irq); unsigned long v; irq_complete_move(cfg); #ifdef CONFIG_GENERIC_PENDING_IRQ /* If we are moving the irq we need to mask it */ - if (unlikely(desc->status & IRQ_MOVE_PENDING)) { + if (unlikely(irq_to_desc(irq)->status & IRQ_MOVE_PENDING)) { do_unmask_irq = 1; mask_ioapic(cfg); } @@ -2643,24 +2641,6 @@ static void lapic_register_intr(int irq) "edge"); } -static void __init setup_nmi(void) -{ - /* - * Dirty trick to enable the NMI watchdog ... - * We put the 8259A master into AEOI mode and - * unmask on all local APICs LVT0 as NMI. - * - * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire') - * is from Maciej W. Rozycki - so we do not have to EOI from - * the NMI handler or the timer interrupt. - */ - apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ..."); - - enable_NMI_through_LVT0(); - - apic_printk(APIC_VERBOSE, " done.\n"); -} - /* * This looks a bit hackish but it's about the only one way of sending * a few INTA cycles to 8259As and any associated glue logic. ICR does @@ -2766,15 +2746,6 @@ static inline void __init check_timer(void) */ apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); legacy_pic->init(1); -#ifdef CONFIG_X86_32 - { - unsigned int ver; - - ver = apic_read(APIC_LVR); - ver = GET_APIC_VERSION(ver); - timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver)); - } -#endif pin1 = find_isa_irq_pin(0, mp_INT); apic1 = find_isa_irq_apic(0, mp_INT); @@ -2822,10 +2793,6 @@ static inline void __init check_timer(void) unmask_ioapic(cfg); } if (timer_irq_works()) { - if (nmi_watchdog == NMI_IO_APIC) { - setup_nmi(); - legacy_pic->unmask(0); - } if (disable_timer_pin_1 > 0) clear_IO_APIC_pin(0, pin1); goto out; @@ -2851,11 +2818,6 @@ static inline void __init check_timer(void) if (timer_irq_works()) { apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); timer_through_8259 = 1; - if (nmi_watchdog == NMI_IO_APIC) { - legacy_pic->mask(0); - setup_nmi(); - legacy_pic->unmask(0); - } goto out; } /* @@ -2867,15 +2829,6 @@ static inline void __init check_timer(void) apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n"); } - if (nmi_watchdog == NMI_IO_APIC) { - apic_printk(APIC_QUIET, KERN_WARNING "timer doesn't work " - "through the IO-APIC - disabling NMI Watchdog!\n"); - nmi_watchdog = NMI_NONE; - } -#ifdef CONFIG_X86_32 - timer_ack = 0; -#endif - apic_printk(APIC_QUIET, KERN_INFO "...trying to set up timer as Virtual Wire IRQ...\n"); @@ -3413,6 +3366,7 @@ dmar_msi_set_affinity(struct irq_data *data, const struct cpumask *mask, msg.data |= MSI_DATA_VECTOR(cfg->vector); msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; msg.address_lo |= MSI_ADDR_DEST_ID(dest); + msg.address_hi = MSI_ADDR_BASE_HI | MSI_ADDR_EXT_DEST_ID(dest); dmar_msi_write(irq, &msg); diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c deleted file mode 100644 index c90041ccb742..000000000000 --- a/arch/x86/kernel/apic/nmi.c +++ /dev/null @@ -1,567 +0,0 @@ -/* - * NMI watchdog support on APIC systems - * - * Started by Ingo Molnar <mingo@redhat.com> - * - * Fixes: - * Mikael Pettersson : AMD K7 support for local APIC NMI watchdog. - * Mikael Pettersson : Power Management for local APIC NMI watchdog. - * Mikael Pettersson : Pentium 4 support for local APIC NMI watchdog. - * Pavel Machek and - * Mikael Pettersson : PM converted to driver model. Disable/enable API. - */ - -#include <asm/apic.h> - -#include <linux/nmi.h> -#include <linux/mm.h> -#include <linux/delay.h> -#include <linux/interrupt.h> -#include <linux/module.h> -#include <linux/slab.h> -#include <linux/sysdev.h> -#include <linux/sysctl.h> -#include <linux/percpu.h> -#include <linux/kprobes.h> -#include <linux/cpumask.h> -#include <linux/kernel_stat.h> -#include <linux/kdebug.h> -#include <linux/smp.h> - -#include <asm/i8259.h> -#include <asm/io_apic.h> -#include <asm/proto.h> -#include <asm/timer.h> - -#include <asm/mce.h> - -#include <asm/mach_traps.h> - -int unknown_nmi_panic; -int nmi_watchdog_enabled; - -/* For reliability, we're prepared to waste bits here. */ -static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly; - -/* nmi_active: - * >0: the lapic NMI watchdog is active, but can be disabled - * <0: the lapic NMI watchdog has not been set up, and cannot - * be enabled - * 0: the lapic NMI watchdog is disabled, but can be enabled - */ -atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */ -EXPORT_SYMBOL(nmi_active); - -unsigned int nmi_watchdog = NMI_NONE; -EXPORT_SYMBOL(nmi_watchdog); - -static int panic_on_timeout; - -static unsigned int nmi_hz = HZ; -static DEFINE_PER_CPU(short, wd_enabled); -static int endflag __initdata; - -static inline unsigned int get_nmi_count(int cpu) -{ - return per_cpu(irq_stat, cpu).__nmi_count; -} - -static inline int mce_in_progress(void) -{ -#if defined(CONFIG_X86_MCE) - return atomic_read(&mce_entry) > 0; -#endif - return 0; -} - -/* - * Take the local apic timer and PIT/HPET into account. We don't - * know which one is active, when we have highres/dyntick on - */ -static inline unsigned int get_timer_irqs(int cpu) -{ - return per_cpu(irq_stat, cpu).apic_timer_irqs + - per_cpu(irq_stat, cpu).irq0_irqs; -} - -#ifdef CONFIG_SMP -/* - * The performance counters used by NMI_LOCAL_APIC don't trigger when - * the CPU is idle. To make sure the NMI watchdog really ticks on all - * CPUs during the test make them busy. - */ -static __init void nmi_cpu_busy(void *data) -{ - local_irq_enable_in_hardirq(); - /* - * Intentionally don't use cpu_relax here. This is - * to make sure that the performance counter really ticks, - * even if there is a simulator or similar that catches the - * pause instruction. On a real HT machine this is fine because - * all other CPUs are busy with "useless" delay loops and don't - * care if they get somewhat less cycles. - */ - while (endflag == 0) - mb(); -} -#endif - -static void report_broken_nmi(int cpu, unsigned int *prev_nmi_count) -{ - printk(KERN_CONT "\n"); - - printk(KERN_WARNING - "WARNING: CPU#%d: NMI appears to be stuck (%d->%d)!\n", - cpu, prev_nmi_count[cpu], get_nmi_count(cpu)); - - printk(KERN_WARNING - "Please report this to bugzilla.kernel.org,\n"); - printk(KERN_WARNING - "and attach the output of the 'dmesg' command.\n"); - - per_cpu(wd_enabled, cpu) = 0; - atomic_dec(&nmi_active); -} - -static void __acpi_nmi_disable(void *__unused) -{ - apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED); -} - -int __init check_nmi_watchdog(void) -{ - unsigned int *prev_nmi_count; - int cpu; - - if (!nmi_watchdog_active() || !atomic_read(&nmi_active)) - return 0; - - prev_nmi_count = kmalloc(nr_cpu_ids * sizeof(int), GFP_KERNEL); - if (!prev_nmi_count) - goto error; - - printk(KERN_INFO "Testing NMI watchdog ... "); - -#ifdef CONFIG_SMP - if (nmi_watchdog == NMI_LOCAL_APIC) - smp_call_function(nmi_cpu_busy, (void *)&endflag, 0); -#endif - - for_each_possible_cpu(cpu) - prev_nmi_count[cpu] = get_nmi_count(cpu); - local_irq_enable(); - mdelay((20 * 1000) / nmi_hz); /* wait 20 ticks */ - - for_each_online_cpu(cpu) { - if (!per_cpu(wd_enabled, cpu)) - continue; - if (get_nmi_count(cpu) - prev_nmi_count[cpu] <= 5) - report_broken_nmi(cpu, prev_nmi_count); - } - endflag = 1; - if (!atomic_read(&nmi_active)) { - kfree(prev_nmi_count); - atomic_set(&nmi_active, -1); - goto error; - } - printk("OK.\n"); - - /* - * now that we know it works we can reduce NMI frequency to - * something more reasonable; makes a difference in some configs - */ - if (nmi_watchdog == NMI_LOCAL_APIC) - nmi_hz = lapic_adjust_nmi_hz(1); - - kfree(prev_nmi_count); - return 0; -error: - if (nmi_watchdog == NMI_IO_APIC) { - if (!timer_through_8259) - legacy_pic->mask(0); - on_each_cpu(__acpi_nmi_disable, NULL, 1); - } - -#ifdef CONFIG_X86_32 - timer_ack = 0; -#endif - return -1; -} - -static int __init setup_nmi_watchdog(char *str) -{ - unsigned int nmi; - - if (!strncmp(str, "panic", 5)) { - panic_on_timeout = 1; - str = strchr(str, ','); - if (!str) - return 1; - ++str; - } - - if (!strncmp(str, "lapic", 5)) - nmi_watchdog = NMI_LOCAL_APIC; - else if (!strncmp(str, "ioapic", 6)) - nmi_watchdog = NMI_IO_APIC; - else { - get_option(&str, &nmi); - if (nmi >= NMI_INVALID) - return 0; - nmi_watchdog = nmi; - } - - return 1; -} -__setup("nmi_watchdog=", setup_nmi_watchdog); - -/* - * Suspend/resume support - */ -#ifdef CONFIG_PM - -static int nmi_pm_active; /* nmi_active before suspend */ - -static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state) -{ - /* only CPU0 goes here, other CPUs should be offline */ - nmi_pm_active = atomic_read(&nmi_active); - stop_apic_nmi_watchdog(NULL); - BUG_ON(atomic_read(&nmi_active) != 0); - return 0; -} - -static int lapic_nmi_resume(struct sys_device *dev) -{ - /* only CPU0 goes here, other CPUs should be offline */ - if (nmi_pm_active > 0) { - setup_apic_nmi_watchdog(NULL); - touch_nmi_watchdog(); - } - return 0; -} - -static struct sysdev_class nmi_sysclass = { - .name = "lapic_nmi", - .resume = lapic_nmi_resume, - .suspend = lapic_nmi_suspend, -}; - -static struct sys_device device_lapic_nmi = { - .id = 0, - .cls = &nmi_sysclass, -}; - -static int __init init_lapic_nmi_sysfs(void) -{ - int error; - - /* - * should really be a BUG_ON but b/c this is an - * init call, it just doesn't work. -dcz - */ - if (nmi_watchdog != NMI_LOCAL_APIC) - return 0; - - if (atomic_read(&nmi_active) < 0) - return 0; - - error = sysdev_class_register(&nmi_sysclass); - if (!error) - error = sysdev_register(&device_lapic_nmi); - return error; -} - -/* must come after the local APIC's device_initcall() */ -late_initcall(init_lapic_nmi_sysfs); - -#endif /* CONFIG_PM */ - -static void __acpi_nmi_enable(void *__unused) -{ - apic_write(APIC_LVT0, APIC_DM_NMI); -} - -/* - * Enable timer based NMIs on all CPUs: - */ -void acpi_nmi_enable(void) -{ - if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC) - on_each_cpu(__acpi_nmi_enable, NULL, 1); -} - -/* - * Disable timer based NMIs on all CPUs: - */ -void acpi_nmi_disable(void) -{ - if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC) - on_each_cpu(__acpi_nmi_disable, NULL, 1); -} - -/* - * This function is called as soon the LAPIC NMI watchdog driver has everything - * in place and it's ready to check if the NMIs belong to the NMI watchdog - */ -void cpu_nmi_set_wd_enabled(void) -{ - __get_cpu_var(wd_enabled) = 1; -} - -void setup_apic_nmi_watchdog(void *unused) -{ - if (__get_cpu_var(wd_enabled)) - return; - - /* cheap hack to support suspend/resume */ - /* if cpu0 is not active neither should the other cpus */ - if (smp_processor_id() != 0 && atomic_read(&nmi_active) <= 0) - return; - - switch (nmi_watchdog) { - case NMI_LOCAL_APIC: - if (lapic_watchdog_init(nmi_hz) < 0) { - __get_cpu_var(wd_enabled) = 0; - return; - } - /* FALL THROUGH */ - case NMI_IO_APIC: - __get_cpu_var(wd_enabled) = 1; - atomic_inc(&nmi_active); - } -} - -void stop_apic_nmi_watchdog(void *unused) -{ - /* only support LOCAL and IO APICs for now */ - if (!nmi_watchdog_active()) - return; - if (__get_cpu_var(wd_enabled) == 0) - return; - if (nmi_watchdog == NMI_LOCAL_APIC) - lapic_watchdog_stop(); - else - __acpi_nmi_disable(NULL); - __get_cpu_var(wd_enabled) = 0; - atomic_dec(&nmi_active); -} - -/* - * the best way to detect whether a CPU has a 'hard lockup' problem - * is to check it's local APIC timer IRQ counts. If they are not - * changing then that CPU has some problem. - * - * as these watchdog NMI IRQs are generated on every CPU, we only - * have to check the current processor. - * - * since NMIs don't listen to _any_ locks, we have to be extremely - * careful not to rely on unsafe variables. The printk might lock - * up though, so we have to break up any console locks first ... - * [when there will be more tty-related locks, break them up here too!] - */ - -static DEFINE_PER_CPU(unsigned, last_irq_sum); -static DEFINE_PER_CPU(long, alert_counter); -static DEFINE_PER_CPU(int, nmi_touch); - -void touch_nmi_watchdog(void) -{ - if (nmi_watchdog_active()) { - unsigned cpu; - - /* - * Tell other CPUs to reset their alert counters. We cannot - * do it ourselves because the alert count increase is not - * atomic. - */ - for_each_present_cpu(cpu) { - if (per_cpu(nmi_touch, cpu) != 1) - per_cpu(nmi_touch, cpu) = 1; - } - } - - /* - * Tickle the softlockup detector too: - */ - touch_softlockup_watchdog(); -} -EXPORT_SYMBOL(touch_nmi_watchdog); - -notrace __kprobes int -nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) -{ - /* - * Since current_thread_info()-> is always on the stack, and we - * always switch the stack NMI-atomically, it's safe to use - * smp_processor_id(). - */ - unsigned int sum; - int touched = 0; - int cpu = smp_processor_id(); - int rc = 0; - - sum = get_timer_irqs(cpu); - - if (__get_cpu_var(nmi_touch)) { - __get_cpu_var(nmi_touch) = 0; - touched = 1; - } - - /* We can be called before check_nmi_watchdog, hence NULL check. */ - if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) { - static DEFINE_RAW_SPINLOCK(lock); /* Serialise the printks */ - - raw_spin_lock(&lock); - printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu); - show_regs(regs); - dump_stack(); - raw_spin_unlock(&lock); - cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask)); - - rc = 1; - } - - /* Could check oops_in_progress here too, but it's safer not to */ - if (mce_in_progress()) - touched = 1; - - /* if the none of the timers isn't firing, this cpu isn't doing much */ - if (!touched && __get_cpu_var(last_irq_sum) == sum) { - /* - * Ayiee, looks like this CPU is stuck ... - * wait a few IRQs (5 seconds) before doing the oops ... - */ - __this_cpu_inc(alert_counter); - if (__this_cpu_read(alert_counter) == 5 * nmi_hz) - /* - * die_nmi will return ONLY if NOTIFY_STOP happens.. - */ - die_nmi("BUG: NMI Watchdog detected LOCKUP", - regs, panic_on_timeout); - } else { - __get_cpu_var(last_irq_sum) = sum; - __this_cpu_write(alert_counter, 0); - } - - /* see if the nmi watchdog went off */ - if (!__get_cpu_var(wd_enabled)) - return rc; - switch (nmi_watchdog) { - case NMI_LOCAL_APIC: - rc |= lapic_wd_event(nmi_hz); - break; - case NMI_IO_APIC: - /* - * don't know how to accurately check for this. - * just assume it was a watchdog timer interrupt - * This matches the old behaviour. - */ - rc = 1; - break; - } - return rc; -} - -#ifdef CONFIG_SYSCTL - -static void enable_ioapic_nmi_watchdog_single(void *unused) -{ - __get_cpu_var(wd_enabled) = 1; - atomic_inc(&nmi_active); - __acpi_nmi_enable(NULL); -} - -static void enable_ioapic_nmi_watchdog(void) -{ - on_each_cpu(enable_ioapic_nmi_watchdog_single, NULL, 1); - touch_nmi_watchdog(); -} - -static void disable_ioapic_nmi_watchdog(void) -{ - on_each_cpu(stop_apic_nmi_watchdog, NULL, 1); -} - -static int __init setup_unknown_nmi_panic(char *str) -{ - unknown_nmi_panic = 1; - return 1; -} -__setup("unknown_nmi_panic", setup_unknown_nmi_panic); - -static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu) -{ - unsigned char reason = get_nmi_reason(); - char buf[64]; - - sprintf(buf, "NMI received for unknown reason %02x\n", reason); - die_nmi(buf, regs, 1); /* Always panic here */ - return 0; -} - -/* - * proc handler for /proc/sys/kernel/nmi - */ -int proc_nmi_enabled(struct ctl_table *table, int write, - void __user *buffer, size_t *length, loff_t *ppos) -{ - int old_state; - - nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0; - old_state = nmi_watchdog_enabled; - proc_dointvec(table, write, buffer, length, ppos); - if (!!old_state == !!nmi_watchdog_enabled) - return 0; - - if (atomic_read(&nmi_active) < 0 || !nmi_watchdog_active()) { - printk(KERN_WARNING - "NMI watchdog is permanently disabled\n"); - return -EIO; - } - - if (nmi_watchdog == NMI_LOCAL_APIC) { - if (nmi_watchdog_enabled) - enable_lapic_nmi_watchdog(); - else - disable_lapic_nmi_watchdog(); - } else if (nmi_watchdog == NMI_IO_APIC) { - if (nmi_watchdog_enabled) - enable_ioapic_nmi_watchdog(); - else - disable_ioapic_nmi_watchdog(); - } else { - printk(KERN_WARNING - "NMI watchdog doesn't know what hardware to touch\n"); - return -EIO; - } - return 0; -} - -#endif /* CONFIG_SYSCTL */ - -int do_nmi_callback(struct pt_regs *regs, int cpu) -{ -#ifdef CONFIG_SYSCTL - if (unknown_nmi_panic) - return unknown_nmi_panic_callback(regs, cpu); -#endif - return 0; -} - -void arch_trigger_all_cpu_backtrace(void) -{ - int i; - - cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask); - - printk(KERN_INFO "sending NMI to all CPUs:\n"); - apic->send_IPI_all(NMI_VECTOR); - - /* Wait for up to 10 seconds for all CPUs to do the backtrace */ - for (i = 0; i < 10 * 1000; i++) { - if (cpumask_empty(to_cpumask(backtrace_mask))) - break; - mdelay(1); - } -} diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c index f9e4e6a54073..d8c4a6feb286 100644 --- a/arch/x86/kernel/apic/probe_64.c +++ b/arch/x86/kernel/apic/probe_64.c @@ -79,13 +79,6 @@ void __init default_setup_apic_routing(void) /* need to update phys_pkg_id */ apic->phys_pkg_id = apicid_phys_pkg_id; } - - /* - * Now that apic routing model is selected, configure the - * fault handling for intr remapping. - */ - if (intr_remapping_enabled) - enable_drhd_fault_handling(); } /* Same for both flat and physical. */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 4b68bda30938..1d59834396bd 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -894,7 +894,6 @@ void __init identify_boot_cpu(void) #else vgetcpu_set_mode(); #endif - init_hw_perf_events(); } void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 6d75b9145b13..0a360d146596 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -330,9 +330,6 @@ static bool reserve_pmc_hardware(void) { int i; - if (nmi_watchdog == NMI_LOCAL_APIC) - disable_lapic_nmi_watchdog(); - for (i = 0; i < x86_pmu.num_counters; i++) { if (!reserve_perfctr_nmi(x86_pmu.perfctr + i)) goto perfctr_fail; @@ -355,9 +352,6 @@ perfctr_fail: for (i--; i >= 0; i--) release_perfctr_nmi(x86_pmu.perfctr + i); - if (nmi_watchdog == NMI_LOCAL_APIC) - enable_lapic_nmi_watchdog(); - return false; } @@ -369,9 +363,6 @@ static void release_pmc_hardware(void) release_perfctr_nmi(x86_pmu.perfctr + i); release_evntsel_nmi(x86_pmu.eventsel + i); } - - if (nmi_watchdog == NMI_LOCAL_APIC) - enable_lapic_nmi_watchdog(); } #else @@ -384,15 +375,53 @@ static void release_pmc_hardware(void) {} static bool check_hw_exists(void) { u64 val, val_new = 0; - int ret = 0; + int i, reg, ret = 0; + + /* + * Check to see if the BIOS enabled any of the counters, if so + * complain and bail. + */ + for (i = 0; i < x86_pmu.num_counters; i++) { + reg = x86_pmu.eventsel + i; + ret = rdmsrl_safe(reg, &val); + if (ret) + goto msr_fail; + if (val & ARCH_PERFMON_EVENTSEL_ENABLE) + goto bios_fail; + } + if (x86_pmu.num_counters_fixed) { + reg = MSR_ARCH_PERFMON_FIXED_CTR_CTRL; + ret = rdmsrl_safe(reg, &val); + if (ret) + goto msr_fail; + for (i = 0; i < x86_pmu.num_counters_fixed; i++) { + if (val & (0x03 << i*4)) + goto bios_fail; + } + } + + /* + * Now write a value and read it back to see if it matches, + * this is needed to detect certain hardware emulators (qemu/kvm) + * that don't trap on the MSR access and always return 0s. + */ val = 0xabcdUL; - ret |= checking_wrmsrl(x86_pmu.perfctr, val); + ret = checking_wrmsrl(x86_pmu.perfctr, val); ret |= rdmsrl_safe(x86_pmu.perfctr, &val_new); if (ret || val != val_new) - return false; + goto msr_fail; return true; + +bios_fail: + printk(KERN_CONT "Broken BIOS detected, using software events only.\n"); + printk(KERN_ERR FW_BUG "the BIOS has corrupted hw-PMU resources (MSR %x is %Lx)\n", reg, val); + return false; + +msr_fail: + printk(KERN_CONT "Broken PMU hardware detected, using software events only.\n"); + return false; } static void reserve_ds_buffers(void); @@ -451,7 +480,7 @@ static int x86_setup_perfctr(struct perf_event *event) struct hw_perf_event *hwc = &event->hw; u64 config; - if (!hwc->sample_period) { + if (!is_sampling_event(event)) { hwc->sample_period = x86_pmu.max_period; hwc->last_period = hwc->sample_period; local64_set(&hwc->period_left, hwc->sample_period); @@ -1362,7 +1391,7 @@ static void __init pmu_check_apic(void) pr_info("no hardware sampling interrupt available.\n"); } -void __init init_hw_perf_events(void) +int __init init_hw_perf_events(void) { struct event_constraint *c; int err; @@ -1377,20 +1406,18 @@ void __init init_hw_perf_events(void) err = amd_pmu_init(); break; default: - return; + return 0; } if (err != 0) { pr_cont("no PMU driver, software events only.\n"); - return; + return 0; } pmu_check_apic(); /* sanity check that the hardware exists or is emulated */ - if (!check_hw_exists()) { - pr_cont("Broken PMU hardware detected, software events only.\n"); - return; - } + if (!check_hw_exists()) + return 0; pr_cont("%s PMU driver.\n", x86_pmu.name); @@ -1438,9 +1465,12 @@ void __init init_hw_perf_events(void) pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed); pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl); - perf_pmu_register(&pmu); + perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW); perf_cpu_notifier(x86_pmu_notifier); + + return 0; } +early_initcall(init_hw_perf_events); static inline void x86_pmu_read(struct perf_event *event) { @@ -1686,7 +1716,7 @@ perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) perf_callchain_store(entry, regs->ip); - dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry); + dump_trace(NULL, regs, NULL, &backtrace_ops, entry); } #ifdef CONFIG_COMPAT diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index e421b8cd6944..67e2202a6039 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c @@ -1,7 +1,5 @@ #ifdef CONFIG_CPU_SUP_AMD -static DEFINE_RAW_SPINLOCK(amd_nb_lock); - static __initconst const u64 amd_hw_cache_event_ids [PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] @@ -275,7 +273,7 @@ done: return &emptyconstraint; } -static struct amd_nb *amd_alloc_nb(int cpu, int nb_id) +static struct amd_nb *amd_alloc_nb(int cpu) { struct amd_nb *nb; int i; @@ -285,7 +283,7 @@ static struct amd_nb *amd_alloc_nb(int cpu, int nb_id) if (!nb) return NULL; - nb->nb_id = nb_id; + nb->nb_id = -1; /* * initialize all possible NB constraints @@ -306,7 +304,7 @@ static int amd_pmu_cpu_prepare(int cpu) if (boot_cpu_data.x86_max_cores < 2) return NOTIFY_OK; - cpuc->amd_nb = amd_alloc_nb(cpu, -1); + cpuc->amd_nb = amd_alloc_nb(cpu); if (!cpuc->amd_nb) return NOTIFY_BAD; @@ -325,8 +323,6 @@ static void amd_pmu_cpu_starting(int cpu) nb_id = amd_get_nb_id(cpu); WARN_ON_ONCE(nb_id == BAD_APICID); - raw_spin_lock(&amd_nb_lock); - for_each_online_cpu(i) { nb = per_cpu(cpu_hw_events, i).amd_nb; if (WARN_ON_ONCE(!nb)) @@ -341,8 +337,6 @@ static void amd_pmu_cpu_starting(int cpu) cpuc->amd_nb->nb_id = nb_id; cpuc->amd_nb->refcnt++; - - raw_spin_unlock(&amd_nb_lock); } static void amd_pmu_cpu_dead(int cpu) @@ -354,8 +348,6 @@ static void amd_pmu_cpu_dead(int cpu) cpuhw = &per_cpu(cpu_hw_events, cpu); - raw_spin_lock(&amd_nb_lock); - if (cpuhw->amd_nb) { struct amd_nb *nb = cpuhw->amd_nb; @@ -364,8 +356,6 @@ static void amd_pmu_cpu_dead(int cpu) cpuhw->amd_nb = NULL; } - - raw_spin_unlock(&amd_nb_lock); } static __initconst const struct x86_pmu amd_pmu = { diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index c8f5c088cad1..24e390e40f2e 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -816,6 +816,32 @@ static int intel_pmu_hw_config(struct perf_event *event) if (ret) return ret; + if (event->attr.precise_ip && + (event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) { + /* + * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P + * (0x003c) so that we can use it with PEBS. + * + * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't + * PEBS capable. However we can use INST_RETIRED.ANY_P + * (0x00c0), which is a PEBS capable event, to get the same + * count. + * + * INST_RETIRED.ANY_P counts the number of cycles that retires + * CNTMASK instructions. By setting CNTMASK to a value (16) + * larger than the maximum number of instructions that can be + * retired per cycle (4) and then inverting the condition, we + * count all cycles that retire 16 or less instructions, which + * is every cycle. + * + * Thereby we gain a PEBS capable cycle counter. + */ + u64 alt_config = 0x108000c0; /* INST_RETIRED.TOTAL_CYCLES */ + + alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK); + event->hw.config = alt_config; + } + if (event->attr.type != PERF_TYPE_RAW) return 0; diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index d9f4ff8fcd69..d5a236615501 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c @@ -16,32 +16,12 @@ #include <linux/kernel.h> #include <linux/bitops.h> #include <linux/smp.h> -#include <linux/nmi.h> +#include <asm/nmi.h> #include <linux/kprobes.h> #include <asm/apic.h> #include <asm/perf_event.h> -struct nmi_watchdog_ctlblk { - unsigned int cccr_msr; - unsigned int perfctr_msr; /* the MSR to reset in NMI handler */ - unsigned int evntsel_msr; /* the MSR to select the events to handle */ -}; - -/* Interface defining a CPU specific perfctr watchdog */ -struct wd_ops { - int (*reserve)(void); - void (*unreserve)(void); - int (*setup)(unsigned nmi_hz); - void (*rearm)(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz); - void (*stop)(void); - unsigned perfctr; - unsigned evntsel; - u64 checkbit; -}; - -static const struct wd_ops *wd_ops; - /* * this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's * offset from MSR_P4_BSU_ESCR0. @@ -60,8 +40,6 @@ static const struct wd_ops *wd_ops; static DECLARE_BITMAP(perfctr_nmi_owner, NMI_MAX_COUNTER_BITS); static DECLARE_BITMAP(evntsel_nmi_owner, NMI_MAX_COUNTER_BITS); -static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk); - /* converts an msr to an appropriate reservation bit */ static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr) { @@ -172,623 +150,3 @@ void release_evntsel_nmi(unsigned int msr) clear_bit(counter, evntsel_nmi_owner); } EXPORT_SYMBOL(release_evntsel_nmi); - -void disable_lapic_nmi_watchdog(void) -{ - BUG_ON(nmi_watchdog != NMI_LOCAL_APIC); - - if (atomic_read(&nmi_active) <= 0) - return; - - on_each_cpu(stop_apic_nmi_watchdog, NULL, 1); - - if (wd_ops) - wd_ops->unreserve(); - - BUG_ON(atomic_read(&nmi_active) != 0); -} - -void enable_lapic_nmi_watchdog(void) -{ - BUG_ON(nmi_watchdog != NMI_LOCAL_APIC); - - /* are we already enabled */ - if (atomic_read(&nmi_active) != 0) - return; - - /* are we lapic aware */ - if (!wd_ops) - return; - if (!wd_ops->reserve()) { - printk(KERN_ERR "NMI watchdog: cannot reserve perfctrs\n"); - return; - } - - on_each_cpu(setup_apic_nmi_watchdog, NULL, 1); - touch_nmi_watchdog(); -} - -/* - * Activate the NMI watchdog via the local APIC. - */ - -static unsigned int adjust_for_32bit_ctr(unsigned int hz) -{ - u64 counter_val; - unsigned int retval = hz; - - /* - * On Intel CPUs with P6/ARCH_PERFMON only 32 bits in the counter - * are writable, with higher bits sign extending from bit 31. - * So, we can only program the counter with 31 bit values and - * 32nd bit should be 1, for 33.. to be 1. - * Find the appropriate nmi_hz - */ - counter_val = (u64)cpu_khz * 1000; - do_div(counter_val, retval); - if (counter_val > 0x7fffffffULL) { - u64 count = (u64)cpu_khz * 1000; - do_div(count, 0x7fffffffUL); - retval = count + 1; - } - return retval; -} - -static void write_watchdog_counter(unsigned int perfctr_msr, - const char *descr, unsigned nmi_hz) -{ - u64 count = (u64)cpu_khz * 1000; - - do_div(count, nmi_hz); - if (descr) - pr_debug("setting %s to -0x%08Lx\n", descr, count); - wrmsrl(perfctr_msr, 0 - count); -} - -static void write_watchdog_counter32(unsigned int perfctr_msr, - const char *descr, unsigned nmi_hz) -{ - u64 count = (u64)cpu_khz * 1000; - - do_div(count, nmi_hz); - if (descr) - pr_debug("setting %s to -0x%08Lx\n", descr, count); - wrmsr(perfctr_msr, (u32)(-count), 0); -} - -/* - * AMD K7/K8/Family10h/Family11h support. - * AMD keeps this interface nicely stable so there is not much variety - */ -#define K7_EVNTSEL_ENABLE (1 << 22) -#define K7_EVNTSEL_INT (1 << 20) -#define K7_EVNTSEL_OS (1 << 17) -#define K7_EVNTSEL_USR (1 << 16) -#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76 -#define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING - -static int setup_k7_watchdog(unsigned nmi_hz) -{ - unsigned int perfctr_msr, evntsel_msr; - unsigned int evntsel; - struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); - - perfctr_msr = wd_ops->perfctr; - evntsel_msr = wd_ops->evntsel; - - wrmsrl(perfctr_msr, 0UL); - - evntsel = K7_EVNTSEL_INT - | K7_EVNTSEL_OS - | K7_EVNTSEL_USR - | K7_NMI_EVENT; - - /* setup the timer */ - wrmsr(evntsel_msr, evntsel, 0); - write_watchdog_counter(perfctr_msr, "K7_PERFCTR0", nmi_hz); - - /* initialize the wd struct before enabling */ - wd->perfctr_msr = perfctr_msr; - wd->evntsel_msr = evntsel_msr; - wd->cccr_msr = 0; /* unused */ - - /* ok, everything is initialized, announce that we're set */ - cpu_nmi_set_wd_enabled(); - - apic_write(APIC_LVTPC, APIC_DM_NMI); - evntsel |= K7_EVNTSEL_ENABLE; - wrmsr(evntsel_msr, evntsel, 0); - - return 1; -} - -static void single_msr_stop_watchdog(void) -{ - struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); - - wrmsr(wd->evntsel_msr, 0, 0); -} - -static int single_msr_reserve(void) -{ - if (!reserve_perfctr_nmi(wd_ops->perfctr)) - return 0; - - if (!reserve_evntsel_nmi(wd_ops->evntsel)) { - release_perfctr_nmi(wd_ops->perfctr); - return 0; - } - return 1; -} - -static void single_msr_unreserve(void) -{ - release_evntsel_nmi(wd_ops->evntsel); - release_perfctr_nmi(wd_ops->perfctr); -} - -static void __kprobes -single_msr_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) -{ - /* start the cycle over again */ - write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz); -} - -static const struct wd_ops k7_wd_ops = { - .reserve = single_msr_reserve, - .unreserve = single_msr_unreserve, - .setup = setup_k7_watchdog, - .rearm = single_msr_rearm, - .stop = single_msr_stop_watchdog, - .perfctr = MSR_K7_PERFCTR0, - .evntsel = MSR_K7_EVNTSEL0, - .checkbit = 1ULL << 47, -}; - -/* - * Intel Model 6 (PPro+,P2,P3,P-M,Core1) - */ -#define P6_EVNTSEL0_ENABLE (1 << 22) -#define P6_EVNTSEL_INT (1 << 20) -#define P6_EVNTSEL_OS (1 << 17) -#define P6_EVNTSEL_USR (1 << 16) -#define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79 -#define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED - -static int setup_p6_watchdog(unsigned nmi_hz) -{ - unsigned int perfctr_msr, evntsel_msr; - unsigned int evntsel; - struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); - - perfctr_msr = wd_ops->perfctr; - evntsel_msr = wd_ops->evntsel; - - /* KVM doesn't implement this MSR */ - if (wrmsr_safe(perfctr_msr, 0, 0) < 0) - return 0; - - evntsel = P6_EVNTSEL_INT - | P6_EVNTSEL_OS - | P6_EVNTSEL_USR - | P6_NMI_EVENT; - - /* setup the timer */ - wrmsr(evntsel_msr, evntsel, 0); - nmi_hz = adjust_for_32bit_ctr(nmi_hz); - write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0", nmi_hz); - - /* initialize the wd struct before enabling */ - wd->perfctr_msr = perfctr_msr; - wd->evntsel_msr = evntsel_msr; - wd->cccr_msr = 0; /* unused */ - - /* ok, everything is initialized, announce that we're set */ - cpu_nmi_set_wd_enabled(); - - apic_write(APIC_LVTPC, APIC_DM_NMI); - evntsel |= P6_EVNTSEL0_ENABLE; - wrmsr(evntsel_msr, evntsel, 0); - - return 1; -} - -static void __kprobes p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) -{ - /* - * P6 based Pentium M need to re-unmask - * the apic vector but it doesn't hurt - * other P6 variant. - * ArchPerfom/Core Duo also needs this - */ - apic_write(APIC_LVTPC, APIC_DM_NMI); - - /* P6/ARCH_PERFMON has 32 bit counter write */ - write_watchdog_counter32(wd->perfctr_msr, NULL, nmi_hz); -} - -static const struct wd_ops p6_wd_ops = { - .reserve = single_msr_reserve, - .unreserve = single_msr_unreserve, - .setup = setup_p6_watchdog, - .rearm = p6_rearm, - .stop = single_msr_stop_watchdog, - .perfctr = MSR_P6_PERFCTR0, - .evntsel = MSR_P6_EVNTSEL0, - .checkbit = 1ULL << 39, -}; - -/* - * Intel P4 performance counters. - * By far the most complicated of all. - */ -#define MSR_P4_MISC_ENABLE_PERF_AVAIL (1 << 7) -#define P4_ESCR_EVENT_SELECT(N) ((N) << 25) -#define P4_ESCR_OS (1 << 3) -#define P4_ESCR_USR (1 << 2) -#define P4_CCCR_OVF_PMI0 (1 << 26) -#define P4_CCCR_OVF_PMI1 (1 << 27) -#define P4_CCCR_THRESHOLD(N) ((N) << 20) -#define P4_CCCR_COMPLEMENT (1 << 19) -#define P4_CCCR_COMPARE (1 << 18) -#define P4_CCCR_REQUIRED (3 << 16) -#define P4_CCCR_ESCR_SELECT(N) ((N) << 13) -#define P4_CCCR_ENABLE (1 << 12) -#define P4_CCCR_OVF (1 << 31) - -#define P4_CONTROLS 18 -static unsigned int p4_controls[18] = { - MSR_P4_BPU_CCCR0, - MSR_P4_BPU_CCCR1, - MSR_P4_BPU_CCCR2, - MSR_P4_BPU_CCCR3, - MSR_P4_MS_CCCR0, - MSR_P4_MS_CCCR1, - MSR_P4_MS_CCCR2, - MSR_P4_MS_CCCR3, - MSR_P4_FLAME_CCCR0, - MSR_P4_FLAME_CCCR1, - MSR_P4_FLAME_CCCR2, - MSR_P4_FLAME_CCCR3, - MSR_P4_IQ_CCCR0, - MSR_P4_IQ_CCCR1, - MSR_P4_IQ_CCCR2, - MSR_P4_IQ_CCCR3, - MSR_P4_IQ_CCCR4, - MSR_P4_IQ_CCCR5, -}; -/* - * Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter - * CRU_ESCR0 (with any non-null event selector) through a complemented - * max threshold. [IA32-Vol3, Section 14.9.9] - */ -static int setup_p4_watchdog(unsigned nmi_hz) -{ - unsigned int perfctr_msr, evntsel_msr, cccr_msr; - unsigned int evntsel, cccr_val; - unsigned int misc_enable, dummy; - unsigned int ht_num; - struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); - - rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy); - if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL)) - return 0; - -#ifdef CONFIG_SMP - /* detect which hyperthread we are on */ - if (smp_num_siblings == 2) { - unsigned int ebx, apicid; - - ebx = cpuid_ebx(1); - apicid = (ebx >> 24) & 0xff; - ht_num = apicid & 1; - } else -#endif - ht_num = 0; - - /* - * performance counters are shared resources - * assign each hyperthread its own set - * (re-use the ESCR0 register, seems safe - * and keeps the cccr_val the same) - */ - if (!ht_num) { - /* logical cpu 0 */ - perfctr_msr = MSR_P4_IQ_PERFCTR0; - evntsel_msr = MSR_P4_CRU_ESCR0; - cccr_msr = MSR_P4_IQ_CCCR0; - cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4); - - /* - * If we're on the kdump kernel or other situation, we may - * still have other performance counter registers set to - * interrupt and they'll keep interrupting forever because - * of the P4_CCCR_OVF quirk. So we need to ACK all the - * pending interrupts and disable all the registers here, - * before reenabling the NMI delivery. Refer to p4_rearm() - * about the P4_CCCR_OVF quirk. - */ - if (reset_devices) { - unsigned int low, high; - int i; - - for (i = 0; i < P4_CONTROLS; i++) { - rdmsr(p4_controls[i], low, high); - low &= ~(P4_CCCR_ENABLE | P4_CCCR_OVF); - wrmsr(p4_controls[i], low, high); - } - } - } else { - /* logical cpu 1 */ - perfctr_msr = MSR_P4_IQ_PERFCTR1; - evntsel_msr = MSR_P4_CRU_ESCR0; - cccr_msr = MSR_P4_IQ_CCCR1; - - /* Pentium 4 D processors don't support P4_CCCR_OVF_PMI1 */ - if (boot_cpu_data.x86_model == 4 && boot_cpu_data.x86_mask == 4) - cccr_val = P4_CCCR_OVF_PMI0; - else - cccr_val = P4_CCCR_OVF_PMI1; - cccr_val |= P4_CCCR_ESCR_SELECT(4); - } - - evntsel = P4_ESCR_EVENT_SELECT(0x3F) - | P4_ESCR_OS - | P4_ESCR_USR; - - cccr_val |= P4_CCCR_THRESHOLD(15) - | P4_CCCR_COMPLEMENT - | P4_CCCR_COMPARE - | P4_CCCR_REQUIRED; - - wrmsr(evntsel_msr, evntsel, 0); - wrmsr(cccr_msr, cccr_val, 0); - write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0", nmi_hz); - - wd->perfctr_msr = perfctr_msr; - wd->evntsel_msr = evntsel_msr; - wd->cccr_msr = cccr_msr; - - /* ok, everything is initialized, announce that we're set */ - cpu_nmi_set_wd_enabled(); - - apic_write(APIC_LVTPC, APIC_DM_NMI); - cccr_val |= P4_CCCR_ENABLE; - wrmsr(cccr_msr, cccr_val, 0); - return 1; -} - -static void stop_p4_watchdog(void) -{ - struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); - wrmsr(wd->cccr_msr, 0, 0); - wrmsr(wd->evntsel_msr, 0, 0); -} - -static int p4_reserve(void) -{ - if (!reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR0)) - return 0; -#ifdef CONFIG_SMP - if (smp_num_siblings > 1 && !reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR1)) - goto fail1; -#endif - if (!reserve_evntsel_nmi(MSR_P4_CRU_ESCR0)) - goto fail2; - /* RED-PEN why is ESCR1 not reserved here? */ - return 1; - fail2: -#ifdef CONFIG_SMP - if (smp_num_siblings > 1) - release_perfctr_nmi(MSR_P4_IQ_PERFCTR1); - fail1: -#endif - release_perfctr_nmi(MSR_P4_IQ_PERFCTR0); - return 0; -} - -static void p4_unreserve(void) -{ -#ifdef CONFIG_SMP - if (smp_num_siblings > 1) - release_perfctr_nmi(MSR_P4_IQ_PERFCTR1); -#endif - release_evntsel_nmi(MSR_P4_CRU_ESCR0); - release_perfctr_nmi(MSR_P4_IQ_PERFCTR0); -} - -static void __kprobes p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) -{ - unsigned dummy; - /* - * P4 quirks: - * - An overflown perfctr will assert its interrupt - * until the OVF flag in its CCCR is cleared. - * - LVTPC is masked on interrupt and must be - * unmasked by the LVTPC handler. - */ - rdmsrl(wd->cccr_msr, dummy); - dummy &= ~P4_CCCR_OVF; - wrmsrl(wd->cccr_msr, dummy); - apic_write(APIC_LVTPC, APIC_DM_NMI); - /* start the cycle over again */ - write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz); -} - -static const struct wd_ops p4_wd_ops = { - .reserve = p4_reserve, - .unreserve = p4_unreserve, - .setup = setup_p4_watchdog, - .rearm = p4_rearm, - .stop = stop_p4_watchdog, - /* RED-PEN this is wrong for the other sibling */ - .perfctr = MSR_P4_BPU_PERFCTR0, - .evntsel = MSR_P4_BSU_ESCR0, - .checkbit = 1ULL << 39, -}; - -/* - * Watchdog using the Intel architected PerfMon. - * Used for Core2 and hopefully all future Intel CPUs. - */ -#define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL -#define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK - -static struct wd_ops intel_arch_wd_ops; - -static int setup_intel_arch_watchdog(unsigned nmi_hz) -{ - unsigned int ebx; - union cpuid10_eax eax; - unsigned int unused; - unsigned int perfctr_msr, evntsel_msr; - unsigned int evntsel; - struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); - - /* - * Check whether the Architectural PerfMon supports - * Unhalted Core Cycles Event or not. - * NOTE: Corresponding bit = 0 in ebx indicates event present. - */ - cpuid(10, &(eax.full), &ebx, &unused, &unused); - if ((eax.split.mask_length < - (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) || - (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT)) - return 0; - - perfctr_msr = wd_ops->perfctr; - evntsel_msr = wd_ops->evntsel; - - wrmsrl(perfctr_msr, 0UL); - - evntsel = ARCH_PERFMON_EVENTSEL_INT - | ARCH_PERFMON_EVENTSEL_OS - | ARCH_PERFMON_EVENTSEL_USR - | ARCH_PERFMON_NMI_EVENT_SEL - | ARCH_PERFMON_NMI_EVENT_UMASK; - - /* setup the timer */ - wrmsr(evntsel_msr, evntsel, 0); - nmi_hz = adjust_for_32bit_ctr(nmi_hz); - write_watchdog_counter32(perfctr_msr, "INTEL_ARCH_PERFCTR0", nmi_hz); - - wd->perfctr_msr = perfctr_msr; - wd->evntsel_msr = evntsel_msr; - wd->cccr_msr = 0; /* unused */ - - /* ok, everything is initialized, announce that we're set */ - cpu_nmi_set_wd_enabled(); - - apic_write(APIC_LVTPC, APIC_DM_NMI); - evntsel |= ARCH_PERFMON_EVENTSEL_ENABLE; - wrmsr(evntsel_msr, evntsel, 0); - intel_arch_wd_ops.checkbit = 1ULL << (eax.split.bit_width - 1); - return 1; -} - -static struct wd_ops intel_arch_wd_ops __read_mostly = { - .reserve = single_msr_reserve, - .unreserve = single_msr_unreserve, - .setup = setup_intel_arch_watchdog, - .rearm = p6_rearm, - .stop = single_msr_stop_watchdog, - .perfctr = MSR_ARCH_PERFMON_PERFCTR1, - .evntsel = MSR_ARCH_PERFMON_EVENTSEL1, -}; - -static void probe_nmi_watchdog(void) -{ - switch (boot_cpu_data.x86_vendor) { - case X86_VENDOR_AMD: - if (boot_cpu_data.x86 == 6 || - (boot_cpu_data.x86 >= 0xf && boot_cpu_data.x86 <= 0x15)) - wd_ops = &k7_wd_ops; - return; - case X86_VENDOR_INTEL: - /* Work around where perfctr1 doesn't have a working enable - * bit as described in the following errata: - * AE49 Core Duo and Intel Core Solo 65 nm - * AN49 Intel Pentium Dual-Core - * AF49 Dual-Core Intel Xeon Processor LV - */ - if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 14) || - ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 15 && - boot_cpu_data.x86_mask == 4))) { - intel_arch_wd_ops.perfctr = MSR_ARCH_PERFMON_PERFCTR0; - intel_arch_wd_ops.evntsel = MSR_ARCH_PERFMON_EVENTSEL0; - } - if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { - wd_ops = &intel_arch_wd_ops; - break; - } - switch (boot_cpu_data.x86) { - case 6: - if (boot_cpu_data.x86_model > 13) - return; - - wd_ops = &p6_wd_ops; - break; - case 15: - wd_ops = &p4_wd_ops; - break; - default: - return; - } - break; - } -} - -/* Interface to nmi.c */ - -int lapic_watchdog_init(unsigned nmi_hz) -{ - if (!wd_ops) { - probe_nmi_watchdog(); - if (!wd_ops) { - printk(KERN_INFO "NMI watchdog: CPU not supported\n"); - return -1; - } - - if (!wd_ops->reserve()) { - printk(KERN_ERR - "NMI watchdog: cannot reserve perfctrs\n"); - return -1; - } - } - - if (!(wd_ops->setup(nmi_hz))) { - printk(KERN_ERR "Cannot setup NMI watchdog on CPU %d\n", - raw_smp_processor_id()); - return -1; - } - - return 0; -} - -void lapic_watchdog_stop(void) -{ - if (wd_ops) - wd_ops->stop(); -} - -unsigned lapic_adjust_nmi_hz(unsigned hz) -{ - struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); - if (wd->perfctr_msr == MSR_P6_PERFCTR0 || - wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR1) - hz = adjust_for_32bit_ctr(hz); - return hz; -} - -int __kprobes lapic_wd_event(unsigned nmi_hz) -{ - struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); - u64 ctr; - - rdmsrl(wd->perfctr_msr, ctr); - if (ctr & wd_ops->checkbit) /* perfctr still running? */ - return 0; - - wd_ops->rearm(wd, nmi_hz); - return 1; -} diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 6e8752c1bd52..8474c998cbd4 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -175,21 +175,21 @@ static const struct stacktrace_ops print_trace_ops = { void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, - unsigned long *stack, unsigned long bp, char *log_lvl) + unsigned long *stack, char *log_lvl) { printk("%sCall Trace:\n", log_lvl); - dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl); + dump_trace(task, regs, stack, &print_trace_ops, log_lvl); } void show_trace(struct task_struct *task, struct pt_regs *regs, - unsigned long *stack, unsigned long bp) + unsigned long *stack) { - show_trace_log_lvl(task, regs, stack, bp, ""); + show_trace_log_lvl(task, regs, stack, ""); } void show_stack(struct task_struct *task, unsigned long *sp) { - show_stack_log_lvl(task, NULL, sp, 0, ""); + show_stack_log_lvl(task, NULL, sp, ""); } /* @@ -210,7 +210,7 @@ void dump_stack(void) init_utsname()->release, (int)strcspn(init_utsname()->version, " "), init_utsname()->version); - show_trace(NULL, NULL, &stack, bp); + show_trace(NULL, NULL, &stack); } EXPORT_SYMBOL(dump_stack); diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index 1bc7f75a5bda..74cc1eda384b 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c @@ -17,11 +17,12 @@ #include <asm/stacktrace.h> -void dump_trace(struct task_struct *task, struct pt_regs *regs, - unsigned long *stack, unsigned long bp, +void dump_trace(struct task_struct *task, + struct pt_regs *regs, unsigned long *stack, const struct stacktrace_ops *ops, void *data) { int graph = 0; + unsigned long bp; if (!task) task = current; @@ -34,18 +35,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, stack = (unsigned long *)task->thread.sp; } -#ifdef CONFIG_FRAME_POINTER - if (!bp) { - if (task == current) { - /* Grab bp right from our regs */ - get_bp(bp); - } else { - /* bp is the last reg pushed by switch_to */ - bp = *(unsigned long *) task->thread.sp; - } - } -#endif - + bp = stack_frame(task, regs); for (;;) { struct thread_info *context; @@ -65,7 +55,7 @@ EXPORT_SYMBOL(dump_trace); void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, - unsigned long *sp, unsigned long bp, char *log_lvl) + unsigned long *sp, char *log_lvl) { unsigned long *stack; int i; @@ -87,7 +77,7 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, touch_nmi_watchdog(); } printk(KERN_CONT "\n"); - show_trace_log_lvl(task, regs, sp, bp, log_lvl); + show_trace_log_lvl(task, regs, sp, log_lvl); } @@ -112,8 +102,7 @@ void show_registers(struct pt_regs *regs) u8 *ip; printk(KERN_EMERG "Stack:\n"); - show_stack_log_lvl(NULL, regs, ®s->sp, - 0, KERN_EMERG); + show_stack_log_lvl(NULL, regs, ®s->sp, KERN_EMERG); printk(KERN_EMERG "Code: "); diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 6a340485249a..64101335de19 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -139,8 +139,8 @@ fixup_bp_irq_link(unsigned long bp, unsigned long *stack, * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack */ -void dump_trace(struct task_struct *task, struct pt_regs *regs, - unsigned long *stack, unsigned long bp, +void dump_trace(struct task_struct *task, + struct pt_regs *regs, unsigned long *stack, const struct stacktrace_ops *ops, void *data) { const unsigned cpu = get_cpu(); @@ -149,6 +149,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, unsigned used = 0; struct thread_info *tinfo; int graph = 0; + unsigned long bp; if (!task) task = current; @@ -160,18 +161,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, stack = (unsigned long *)task->thread.sp; } -#ifdef CONFIG_FRAME_POINTER - if (!bp) { - if (task == current) { - /* Grab bp right from our regs */ - get_bp(bp); - } else { - /* bp is the last reg pushed by switch_to */ - bp = *(unsigned long *) task->thread.sp; - } - } -#endif - + bp = stack_frame(task, regs); /* * Print function call entries in all stacks, starting at the * current stack address. If the stacks consist of nested @@ -235,7 +225,7 @@ EXPORT_SYMBOL(dump_trace); void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, - unsigned long *sp, unsigned long bp, char *log_lvl) + unsigned long *sp, char *log_lvl) { unsigned long *irq_stack_end; unsigned long *irq_stack; @@ -279,7 +269,7 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, preempt_enable(); printk(KERN_CONT "\n"); - show_trace_log_lvl(task, regs, sp, bp, log_lvl); + show_trace_log_lvl(task, regs, sp, log_lvl); } void show_registers(struct pt_regs *regs) @@ -308,7 +298,7 @@ void show_registers(struct pt_regs *regs) printk(KERN_EMERG "Stack:\n"); show_stack_log_lvl(NULL, regs, (unsigned long *)sp, - regs->bp, KERN_EMERG); + KERN_EMERG); printk(KERN_EMERG "Code: "); diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index bcece91dd311..c0dbd9ac24f0 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -60,16 +60,18 @@ #define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PGD) #endif +/* Number of possible pages in the lowmem region */ +LOWMEM_PAGES = (((1<<32) - __PAGE_OFFSET) >> PAGE_SHIFT) + /* Enough space to fit pagetables for the low memory linear map */ -MAPPING_BEYOND_END = \ - PAGE_TABLE_SIZE(((1<<32) - __PAGE_OFFSET) >> PAGE_SHIFT) << PAGE_SHIFT +MAPPING_BEYOND_END = PAGE_TABLE_SIZE(LOWMEM_PAGES) << PAGE_SHIFT /* * Worst-case size of the kernel mapping we need to make: - * the worst-case size of the kernel itself, plus the extra we need - * to map for the linear map. + * a relocatable kernel can live anywhere in lowmem, so we need to be able + * to map all of lowmem. */ -KERNEL_PAGES = (KERNEL_IMAGE_SIZE + MAPPING_BEYOND_END)>>PAGE_SHIFT +KERNEL_PAGES = LOWMEM_PAGES INIT_MAP_SIZE = PAGE_TABLE_SIZE(KERNEL_PAGES) * PAGE_SIZE_asm RESERVE_BRK(pagetables, INIT_MAP_SIZE) @@ -620,13 +622,13 @@ ENTRY(initial_code) __PAGE_ALIGNED_BSS .align PAGE_SIZE_asm #ifdef CONFIG_X86_PAE -initial_pg_pmd: +ENTRY(initial_pg_pmd) .fill 1024*KPMDS,4,0 #else ENTRY(initial_page_table) .fill 1024,4,0 #endif -initial_pg_fixmap: +ENTRY(initial_pg_fixmap) .fill 1024,4,0 ENTRY(empty_zero_page) .fill 4096,1,0 diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index ae03cab4352e..4ff5968f12d2 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -27,6 +27,9 @@ #define HPET_DEV_FSB_CAP 0x1000 #define HPET_DEV_PERI_CAP 0x2000 +#define HPET_MIN_CYCLES 128 +#define HPET_MIN_PROG_DELTA (HPET_MIN_CYCLES + (HPET_MIN_CYCLES >> 1)) + #define EVT_TO_HPET_DEV(evt) container_of(evt, struct hpet_dev, evt) /* @@ -299,8 +302,9 @@ static void hpet_legacy_clockevent_register(void) /* Calculate the min / max delta */ hpet_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFFFF, &hpet_clockevent); - /* 5 usec minimum reprogramming delta. */ - hpet_clockevent.min_delta_ns = 5000; + /* Setup minimum reprogramming delta. */ + hpet_clockevent.min_delta_ns = clockevent_delta2ns(HPET_MIN_PROG_DELTA, + &hpet_clockevent); /* * Start hpet with the boot cpu mask and make it @@ -393,22 +397,24 @@ static int hpet_next_event(unsigned long delta, * the wraparound into account) nor a simple count down event * mode. Further the write to the comparator register is * delayed internally up to two HPET clock cycles in certain - * chipsets (ATI, ICH9,10). We worked around that by reading - * back the compare register, but that required another - * workaround for ICH9,10 chips where the first readout after - * write can return the old stale value. We already have a - * minimum delta of 5us enforced, but a NMI or SMI hitting + * chipsets (ATI, ICH9,10). Some newer AMD chipsets have even + * longer delays. We worked around that by reading back the + * compare register, but that required another workaround for + * ICH9,10 chips where the first readout after write can + * return the old stale value. We already had a minimum + * programming delta of 5us enforced, but a NMI or SMI hitting * between the counter readout and the comparator write can * move us behind that point easily. Now instead of reading * the compare register back several times, we make the ETIME * decision based on the following: Return ETIME if the - * counter value after the write is less than 8 HPET cycles + * counter value after the write is less than HPET_MIN_CYCLES * away from the event or if the counter is already ahead of - * the event. + * the event. The minimum programming delta for the generic + * clockevents code is set to 1.5 * HPET_MIN_CYCLES. */ res = (s32)(cnt - hpet_readl(HPET_COUNTER)); - return res < 8 ? -ETIME : 0; + return res < HPET_MIN_CYCLES ? -ETIME : 0; } static void hpet_legacy_set_mode(enum clock_event_mode mode, diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 1cbd54c0df99..5940282bd2f9 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -1184,6 +1184,10 @@ static void __kprobes optimized_callback(struct optimized_kprobe *op, { struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + /* This is possible if op is under delayed unoptimizing */ + if (kprobe_disabled(&op->kp)) + return; + preempt_disable(); if (kprobe_running()) { kprobes_inc_nmissed_count(&op->kp); @@ -1401,10 +1405,16 @@ int __kprobes arch_prepare_optimized_kprobe(struct optimized_kprobe *op) return 0; } -/* Replace a breakpoint (int3) with a relative jump. */ -int __kprobes arch_optimize_kprobe(struct optimized_kprobe *op) +#define MAX_OPTIMIZE_PROBES 256 +static struct text_poke_param *jump_poke_params; +static struct jump_poke_buffer { + u8 buf[RELATIVEJUMP_SIZE]; +} *jump_poke_bufs; + +static void __kprobes setup_optimize_kprobe(struct text_poke_param *tprm, + u8 *insn_buf, + struct optimized_kprobe *op) { - unsigned char jmp_code[RELATIVEJUMP_SIZE]; s32 rel = (s32)((long)op->optinsn.insn - ((long)op->kp.addr + RELATIVEJUMP_SIZE)); @@ -1412,16 +1422,79 @@ int __kprobes arch_optimize_kprobe(struct optimized_kprobe *op) memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE, RELATIVE_ADDR_SIZE); - jmp_code[0] = RELATIVEJUMP_OPCODE; - *(s32 *)(&jmp_code[1]) = rel; + insn_buf[0] = RELATIVEJUMP_OPCODE; + *(s32 *)(&insn_buf[1]) = rel; + + tprm->addr = op->kp.addr; + tprm->opcode = insn_buf; + tprm->len = RELATIVEJUMP_SIZE; +} + +/* + * Replace breakpoints (int3) with relative jumps. + * Caller must call with locking kprobe_mutex and text_mutex. + */ +void __kprobes arch_optimize_kprobes(struct list_head *oplist) +{ + struct optimized_kprobe *op, *tmp; + int c = 0; + + list_for_each_entry_safe(op, tmp, oplist, list) { + WARN_ON(kprobe_disabled(&op->kp)); + /* Setup param */ + setup_optimize_kprobe(&jump_poke_params[c], + jump_poke_bufs[c].buf, op); + list_del_init(&op->list); + if (++c >= MAX_OPTIMIZE_PROBES) + break; + } /* * text_poke_smp doesn't support NMI/MCE code modifying. * However, since kprobes itself also doesn't support NMI/MCE * code probing, it's not a problem. */ - text_poke_smp(op->kp.addr, jmp_code, RELATIVEJUMP_SIZE); - return 0; + text_poke_smp_batch(jump_poke_params, c); +} + +static void __kprobes setup_unoptimize_kprobe(struct text_poke_param *tprm, + u8 *insn_buf, + struct optimized_kprobe *op) +{ + /* Set int3 to first byte for kprobes */ + insn_buf[0] = BREAKPOINT_INSTRUCTION; + memcpy(insn_buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE); + + tprm->addr = op->kp.addr; + tprm->opcode = insn_buf; + tprm->len = RELATIVEJUMP_SIZE; +} + +/* + * Recover original instructions and breakpoints from relative jumps. + * Caller must call with locking kprobe_mutex. + */ +extern void arch_unoptimize_kprobes(struct list_head *oplist, + struct list_head *done_list) +{ + struct optimized_kprobe *op, *tmp; + int c = 0; + + list_for_each_entry_safe(op, tmp, oplist, list) { + /* Setup param */ + setup_unoptimize_kprobe(&jump_poke_params[c], + jump_poke_bufs[c].buf, op); + list_move(&op->list, done_list); + if (++c >= MAX_OPTIMIZE_PROBES) + break; + } + + /* + * text_poke_smp doesn't support NMI/MCE code modifying. + * However, since kprobes itself also doesn't support NMI/MCE + * code probing, it's not a problem. + */ + text_poke_smp_batch(jump_poke_params, c); } /* Replace a relative jump with a breakpoint (int3). */ @@ -1453,11 +1526,35 @@ static int __kprobes setup_detour_execution(struct kprobe *p, } return 0; } + +static int __kprobes init_poke_params(void) +{ + /* Allocate code buffer and parameter array */ + jump_poke_bufs = kmalloc(sizeof(struct jump_poke_buffer) * + MAX_OPTIMIZE_PROBES, GFP_KERNEL); + if (!jump_poke_bufs) + return -ENOMEM; + + jump_poke_params = kmalloc(sizeof(struct text_poke_param) * + MAX_OPTIMIZE_PROBES, GFP_KERNEL); + if (!jump_poke_params) { + kfree(jump_poke_bufs); + jump_poke_bufs = NULL; + return -ENOMEM; + } + + return 0; +} +#else /* !CONFIG_OPTPROBES */ +static int __kprobes init_poke_params(void) +{ + return 0; +} #endif int __init arch_init_kprobes(void) { - return 0; + return init_poke_params(); } int __kprobes arch_trampoline_kprobe(struct kprobe *p) diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 57d1868a86aa..96ed1aac543a 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -91,8 +91,7 @@ void exit_thread(void) void show_regs(struct pt_regs *regs) { show_registers(regs); - show_trace(NULL, regs, (unsigned long *)kernel_stack_pointer(regs), - regs->bp); + show_trace(NULL, regs, (unsigned long *)kernel_stack_pointer(regs)); } void show_regs_common(void) diff --git a/arch/x86/kernel/resource.c b/arch/x86/kernel/resource.c new file mode 100644 index 000000000000..2a26819bb6a8 --- /dev/null +++ b/arch/x86/kernel/resource.c @@ -0,0 +1,48 @@ +#include <linux/ioport.h> +#include <asm/e820.h> + +static void resource_clip(struct resource *res, resource_size_t start, + resource_size_t end) +{ + resource_size_t low = 0, high = 0; + + if (res->end < start || res->start > end) + return; /* no conflict */ + + if (res->start < start) + low = start - res->start; + + if (res->end > end) + high = res->end - end; + + /* Keep the area above or below the conflict, whichever is larger */ + if (low > high) + res->end = start - 1; + else + res->start = end + 1; +} + +static void remove_e820_regions(struct resource *avail) +{ + int i; + struct e820entry *entry; + + for (i = 0; i < e820.nr_map; i++) { + entry = &e820.map[i]; + + resource_clip(avail, entry->addr, + entry->addr + entry->size - 1); + } +} + +void arch_remove_reservations(struct resource *avail) +{ + /* Trim out BIOS areas (low 1MB and high 2MB) and E820 regions */ + if (avail->flags & IORESOURCE_MEM) { + if (avail->start < BIOS_END) + avail->start = BIOS_END; + resource_clip(avail, BIOS_ROM_BASE, BIOS_ROM_END); + + remove_e820_regions(avail); + } +} diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 21c6746338af..85268f8eadf6 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -769,7 +769,6 @@ void __init setup_arch(char **cmdline_p) x86_init.oem.arch_setup(); - resource_alloc_from_bottom = 0; iomem_resource.end = (1ULL << boot_cpu_data.x86_phys_bits) - 1; setup_memory_map(); parse_setup_data(); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 083e99d1b7df..68f61ac632e1 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -281,6 +281,13 @@ static void __cpuinit smp_callin(void) */ smp_store_cpu_info(cpuid); + /* + * This must be done before setting cpu_online_mask + * or calling notify_cpu_starting. + */ + set_cpu_sibling_map(raw_smp_processor_id()); + wmb(); + notify_cpu_starting(cpuid); /* @@ -316,16 +323,6 @@ notrace static void __cpuinit start_secondary(void *unused) */ check_tsc_sync_target(); - if (nmi_watchdog == NMI_IO_APIC) { - legacy_pic->mask(0); - enable_NMI_through_LVT0(); - legacy_pic->unmask(0); - } - - /* This must be done before setting cpu_online_mask */ - set_cpu_sibling_map(raw_smp_processor_id()); - wmb(); - /* * We need to hold call_lock, so there is no inconsistency * between the time smp_call_function() determines number of @@ -1061,8 +1058,6 @@ static int __init smp_sanity_check(unsigned max_cpus) printk(KERN_INFO "SMP mode deactivated.\n"); smpboot_clear_io_apic(); - localise_nmi_watchdog(); - connect_bsp_APIC(); setup_local_APIC(); end_local_APIC_setup(); @@ -1196,7 +1191,6 @@ void __init native_smp_cpus_done(unsigned int max_cpus) #ifdef CONFIG_X86_IO_APIC setup_ioapic_dest(); #endif - check_nmi_watchdog(); mtrr_aps_init(); } @@ -1341,8 +1335,6 @@ int native_cpu_disable(void) if (cpu == 0) return -EBUSY; - if (nmi_watchdog == NMI_LOCAL_APIC) - stop_apic_nmi_watchdog(NULL); clear_local_APIC(); cpu_disable_common(); diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c index b53c525368a7..938c8e10a19a 100644 --- a/arch/x86/kernel/stacktrace.c +++ b/arch/x86/kernel/stacktrace.c @@ -73,22 +73,22 @@ static const struct stacktrace_ops save_stack_ops_nosched = { */ void save_stack_trace(struct stack_trace *trace) { - dump_trace(current, NULL, NULL, 0, &save_stack_ops, trace); + dump_trace(current, NULL, NULL, &save_stack_ops, trace); if (trace->nr_entries < trace->max_entries) trace->entries[trace->nr_entries++] = ULONG_MAX; } EXPORT_SYMBOL_GPL(save_stack_trace); -void save_stack_trace_bp(struct stack_trace *trace, unsigned long bp) +void save_stack_trace_regs(struct stack_trace *trace, struct pt_regs *regs) { - dump_trace(current, NULL, NULL, bp, &save_stack_ops, trace); + dump_trace(current, regs, NULL, &save_stack_ops, trace); if (trace->nr_entries < trace->max_entries) trace->entries[trace->nr_entries++] = ULONG_MAX; } void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) { - dump_trace(tsk, NULL, NULL, 0, &save_stack_ops_nosched, trace); + dump_trace(tsk, NULL, NULL, &save_stack_ops_nosched, trace); if (trace->nr_entries < trace->max_entries) trace->entries[trace->nr_entries++] = ULONG_MAX; } diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c index fb5cc5e14cfa..25a28a245937 100644 --- a/arch/x86/kernel/time.c +++ b/arch/x86/kernel/time.c @@ -22,10 +22,6 @@ #include <asm/hpet.h> #include <asm/time.h> -#if defined(CONFIG_X86_32) && defined(CONFIG_X86_IO_APIC) -int timer_ack; -#endif - #ifdef CONFIG_X86_64 volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES; #endif @@ -63,20 +59,6 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id) /* Keep nmi watchdog up to date */ inc_irq_stat(irq0_irqs); - /* Optimized out for !IO_APIC and x86_64 */ - if (timer_ack) { - /* - * Subtle, when I/O APICs are used we have to ack timer IRQ - * manually to deassert NMI lines for the watchdog if run - * on an 82489DX-based system. - */ - raw_spin_lock(&i8259A_lock); - outb(0x0c, PIC_MASTER_OCW3); - /* Ack the IRQ; AEOI will end it automatically. */ - inb(PIC_MASTER_POLL); - raw_spin_unlock(&i8259A_lock); - } - global_clock_event->event_handler(global_clock_event); /* MCA bus quirk: Acknowledge irq0 by setting bit 7 in port 0x61 */ diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index cb838ca42c96..bb6f04167361 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -83,6 +83,8 @@ EXPORT_SYMBOL_GPL(used_vectors); static int ignore_nmis; +int unknown_nmi_panic; + static inline void conditional_sti(struct pt_regs *regs) { if (regs->flags & X86_EFLAGS_IF) @@ -300,6 +302,13 @@ gp_in_kernel: die("general protection fault", regs, error_code); } +static int __init setup_unknown_nmi_panic(char *str) +{ + unknown_nmi_panic = 1; + return 1; +} +__setup("unknown_nmi_panic", setup_unknown_nmi_panic); + static notrace __kprobes void mem_parity_error(unsigned char reason, struct pt_regs *regs) { @@ -371,7 +380,7 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs) reason, smp_processor_id()); printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n"); - if (panic_on_unrecovered_nmi) + if (unknown_nmi_panic || panic_on_unrecovered_nmi) panic("NMI: Not continuing"); printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); @@ -397,20 +406,8 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs) if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) return; - -#ifndef CONFIG_LOCKUP_DETECTOR - /* - * Ok, so this is none of the documented NMI sources, - * so it must be the NMI watchdog. - */ - if (nmi_watchdog_tick(regs, reason)) - return; - if (!do_nmi_callback(regs, cpu)) -#endif /* !CONFIG_LOCKUP_DETECTOR */ - unknown_nmi_error(reason, regs); -#else - unknown_nmi_error(reason, regs); #endif + unknown_nmi_error(reason, regs); return; } @@ -446,14 +443,12 @@ do_nmi(struct pt_regs *regs, long error_code) void stop_nmi(void) { - acpi_nmi_disable(); ignore_nmis++; } void restart_nmi(void) { ignore_nmis--; - acpi_nmi_enable(); } /* May run on IST stack. */ diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index 9c253bd65e24..547128546cc3 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -394,7 +394,8 @@ static void __init setup_xstate_init(void) * Setup init_xstate_buf to represent the init state of * all the features managed by the xsave */ - init_xstate_buf = alloc_bootmem(xstate_size); + init_xstate_buf = alloc_bootmem_align(xstate_size, + __alignof__(struct xsave_struct)); init_xstate_buf->i387.mxcsr = MXCSR_DEFAULT; clts(); diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 1ca12298ffc7..b81a9b7c2ca4 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -3494,6 +3494,10 @@ static void svm_cpuid_update(struct kvm_vcpu *vcpu) static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) { switch (func) { + case 0x00000001: + /* Mask out xsave bit as long as it is not supported by SVM */ + entry->ecx &= ~(bit(X86_FEATURE_XSAVE)); + break; case 0x80000001: if (nested) entry->ecx |= (1 << 2); /* Set SVM bit */ diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index ff21fdda0c53..81fcbe9515c5 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -4227,11 +4227,6 @@ static int vmx_get_lpage_level(void) return PT_PDPE_LEVEL; } -static inline u32 bit(int bitno) -{ - return 1 << (bitno & 31); -} - static void vmx_cpuid_update(struct kvm_vcpu *vcpu) { struct kvm_cpuid_entry2 *best; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index cdac9e592aa5..b989e1f1e5d3 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -155,11 +155,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { u64 __read_mostly host_xcr0; -static inline u32 bit(int bitno) -{ - return 1 << (bitno & 31); -} - static void kvm_on_user_return(struct user_return_notifier *urn) { unsigned slot; @@ -4569,9 +4564,11 @@ static void kvm_timer_init(void) #ifdef CONFIG_CPU_FREQ struct cpufreq_policy policy; memset(&policy, 0, sizeof(policy)); - cpufreq_get_policy(&policy, get_cpu()); + cpu = get_cpu(); + cpufreq_get_policy(&policy, cpu); if (policy.cpuinfo.max_freq) max_tsc_khz = policy.cpuinfo.max_freq; + put_cpu(); #endif cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block, CPUFREQ_TRANSITION_NOTIFIER); @@ -5522,6 +5519,8 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4; kvm_x86_ops->set_cr4(vcpu, sregs->cr4); + if (sregs->cr4 & X86_CR4_OSXSAVE) + update_cpuid(vcpu); if (!is_long_mode(vcpu) && is_pae(vcpu)) { load_pdptrs(vcpu, vcpu->arch.walk_mmu, vcpu->arch.cr3); mmu_reset_needed = 1; diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 2cea414489f3..c600da830ce0 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -70,6 +70,11 @@ static inline int is_paging(struct kvm_vcpu *vcpu) return kvm_read_cr0_bits(vcpu, X86_CR0_PG); } +static inline u32 bit(int bitno) +{ + return 1 << (bitno & 31); +} + void kvm_before_handle_nmi(struct kvm_vcpu *vcpu); void kvm_after_handle_nmi(struct kvm_vcpu *vcpu); int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq); diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 73b1e1a1f489..4996cf5f73a0 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -531,7 +531,10 @@ static void lguest_write_cr3(unsigned long cr3) { lguest_data.pgdir = cr3; lazy_hcall1(LHCALL_NEW_PGTABLE, cr3); - cr3_changed = true; + + /* These two page tables are simple, linear, and used during boot */ + if (cr3 != __pa(swapper_pg_dir) && cr3 != __pa(initial_page_table)) + cr3_changed = true; } static unsigned long lguest_read_cr3(void) @@ -703,9 +706,9 @@ static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval) * to forget all of them. Fortunately, this is very rare. * * ... except in early boot when the kernel sets up the initial pagetables, - * which makes booting astonishingly slow: 1.83 seconds! So we don't even tell - * the Host anything changed until we've done the first page table switch, - * which brings boot back to 0.25 seconds. + * which makes booting astonishingly slow: 48 seconds! So we don't even tell + * the Host anything changed until we've done the first real page table switch, + * which brings boot back to 4.3 seconds. */ static void lguest_set_pte(pte_t *ptep, pte_t pteval) { @@ -1002,7 +1005,7 @@ static void lguest_time_init(void) clockevents_register_device(&lguest_clockevent); /* Finally, we unblock the timer interrupt. */ - enable_lguest_irq(0); + clear_bit(0, lguest_data.blocked_interrupts); } /* @@ -1349,9 +1352,6 @@ __init void lguest_init(void) */ switch_to_new_gdt(0); - /* We actually boot with all memory mapped, but let's say 128MB. */ - max_pfn_mapped = (128*1024*1024) >> PAGE_SHIFT; - /* * The Host<->Guest Switcher lives at the top of our address space, and * the Host told us how big it is when we made LGUEST_INIT hypercall: diff --git a/arch/x86/lguest/i386_head.S b/arch/x86/lguest/i386_head.S index 4f420c2f2d55..e7d5382ef263 100644 --- a/arch/x86/lguest/i386_head.S +++ b/arch/x86/lguest/i386_head.S @@ -4,6 +4,7 @@ #include <asm/asm-offsets.h> #include <asm/thread_info.h> #include <asm/processor-flags.h> +#include <asm/pgtable.h> /*G:020 * Our story starts with the kernel booting into startup_32 in @@ -37,9 +38,113 @@ ENTRY(lguest_entry) /* Set up the initial stack so we can run C code. */ movl $(init_thread_union+THREAD_SIZE),%esp + call init_pagetables + /* Jumps are relative: we're running __PAGE_OFFSET too low. */ jmp lguest_init+__PAGE_OFFSET +/* + * Initialize page tables. This creates a PDE and a set of page + * tables, which are located immediately beyond __brk_base. The variable + * _brk_end is set up to point to the first "safe" location. + * Mappings are created both at virtual address 0 (identity mapping) + * and PAGE_OFFSET for up to _end. + * + * FIXME: This code is taken verbatim from arch/x86/kernel/head_32.S: they + * don't have a stack at this point, so we can't just use call and ret. + */ +init_pagetables: +#if PTRS_PER_PMD > 1 +#define PAGE_TABLE_SIZE(pages) (((pages) / PTRS_PER_PMD) + PTRS_PER_PGD) +#else +#define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PGD) +#endif +#define pa(X) ((X) - __PAGE_OFFSET) + +/* Enough space to fit pagetables for the low memory linear map */ +MAPPING_BEYOND_END = \ + PAGE_TABLE_SIZE(((1<<32) - __PAGE_OFFSET) >> PAGE_SHIFT) << PAGE_SHIFT +#ifdef CONFIG_X86_PAE + + /* + * In PAE mode initial_page_table is statically defined to contain + * enough entries to cover the VMSPLIT option (that is the top 1, 2 or 3 + * entries). The identity mapping is handled by pointing two PGD entries + * to the first kernel PMD. + * + * Note the upper half of each PMD or PTE are always zero at this stage. + */ + +#define KPMDS (((-__PAGE_OFFSET) >> 30) & 3) /* Number of kernel PMDs */ + + xorl %ebx,%ebx /* %ebx is kept at zero */ + + movl $pa(__brk_base), %edi + movl $pa(initial_pg_pmd), %edx + movl $PTE_IDENT_ATTR, %eax +10: + leal PDE_IDENT_ATTR(%edi),%ecx /* Create PMD entry */ + movl %ecx,(%edx) /* Store PMD entry */ + /* Upper half already zero */ + addl $8,%edx + movl $512,%ecx +11: + stosl + xchgl %eax,%ebx + stosl + xchgl %eax,%ebx + addl $0x1000,%eax + loop 11b + + /* + * End condition: we must map up to the end + MAPPING_BEYOND_END. + */ + movl $pa(_end) + MAPPING_BEYOND_END + PTE_IDENT_ATTR, %ebp + cmpl %ebp,%eax + jb 10b +1: + addl $__PAGE_OFFSET, %edi + movl %edi, pa(_brk_end) + shrl $12, %eax + movl %eax, pa(max_pfn_mapped) + + /* Do early initialization of the fixmap area */ + movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax + movl %eax,pa(initial_pg_pmd+0x1000*KPMDS-8) +#else /* Not PAE */ + +page_pde_offset = (__PAGE_OFFSET >> 20); + + movl $pa(__brk_base), %edi + movl $pa(initial_page_table), %edx + movl $PTE_IDENT_ATTR, %eax +10: + leal PDE_IDENT_ATTR(%edi),%ecx /* Create PDE entry */ + movl %ecx,(%edx) /* Store identity PDE entry */ + movl %ecx,page_pde_offset(%edx) /* Store kernel PDE entry */ + addl $4,%edx + movl $1024, %ecx +11: + stosl + addl $0x1000,%eax + loop 11b + /* + * End condition: we must map up to the end + MAPPING_BEYOND_END. + */ + movl $pa(_end) + MAPPING_BEYOND_END + PTE_IDENT_ATTR, %ebp + cmpl %ebp,%eax + jb 10b + addl $__PAGE_OFFSET, %edi + movl %edi, pa(_brk_end) + shrl $12, %eax + movl %eax, pa(max_pfn_mapped) + + /* Do early initialization of the fixmap area */ + movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax + movl %eax,pa(initial_page_table+0xffc) +#endif + ret + /*G:055 * We create a macro which puts the assembler code between lgstart_ and lgend_ * markers. These templates are put in the .text section: they can't be diff --git a/arch/x86/mm/kmemcheck/error.c b/arch/x86/mm/kmemcheck/error.c index af3b6c8a436f..704a37cedddb 100644 --- a/arch/x86/mm/kmemcheck/error.c +++ b/arch/x86/mm/kmemcheck/error.c @@ -185,7 +185,7 @@ void kmemcheck_error_save(enum kmemcheck_shadow state, e->trace.entries = e->trace_entries; e->trace.max_entries = ARRAY_SIZE(e->trace_entries); e->trace.skip = 0; - save_stack_trace_bp(&e->trace, regs->bp); + save_stack_trace_regs(&e->trace, regs); /* Round address down to nearest 16 bytes */ shadow_copy = kmemcheck_shadow_lookup(address diff --git a/arch/x86/oprofile/backtrace.c b/arch/x86/oprofile/backtrace.c index 2d49d4e19a36..72cbec14d783 100644 --- a/arch/x86/oprofile/backtrace.c +++ b/arch/x86/oprofile/backtrace.c @@ -126,7 +126,7 @@ x86_backtrace(struct pt_regs * const regs, unsigned int depth) if (!user_mode_vm(regs)) { unsigned long stack = kernel_stack_pointer(regs); if (depth) - dump_trace(NULL, regs, (unsigned long *)stack, 0, + dump_trace(NULL, regs, (unsigned long *)stack, &backtrace_ops, &depth); return; } diff --git a/arch/x86/oprofile/nmi_timer_int.c b/arch/x86/oprofile/nmi_timer_int.c index e3ecb71b5790..0636dd93cef8 100644 --- a/arch/x86/oprofile/nmi_timer_int.c +++ b/arch/x86/oprofile/nmi_timer_int.c @@ -58,9 +58,6 @@ static void timer_stop(void) int __init op_nmi_timer_init(struct oprofile_operations *ops) { - if ((nmi_watchdog != NMI_IO_APIC) || (atomic_read(&nmi_active) <= 0)) - return -ENODEV; - ops->start = timer_start; ops->stop = timer_stop; ops->cpu_type = "timer"; diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c index 182558dd5515..9fadec074142 100644 --- a/arch/x86/oprofile/op_model_p4.c +++ b/arch/x86/oprofile/op_model_p4.c @@ -11,7 +11,7 @@ #include <linux/oprofile.h> #include <linux/smp.h> #include <linux/ptrace.h> -#include <linux/nmi.h> +#include <asm/nmi.h> #include <asm/msr.h> #include <asm/fixmap.h> #include <asm/apic.h> diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index c4bb261c106e..b1805b78842f 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c @@ -65,21 +65,13 @@ pcibios_align_resource(void *data, const struct resource *res, resource_size_t size, resource_size_t align) { struct pci_dev *dev = data; - resource_size_t start = round_down(res->end - size + 1, align); + resource_size_t start = res->start; if (res->flags & IORESOURCE_IO) { - - /* - * If we're avoiding ISA aliases, the largest contiguous I/O - * port space is 256 bytes. Clearing bits 9 and 10 preserves - * all 256-byte and smaller alignments, so the result will - * still be correctly aligned. - */ - if (!skip_isa_ioresource_align(dev)) - start &= ~0x300; - } else if (res->flags & IORESOURCE_MEM) { - if (start < BIOS_END) - start = res->end; /* fail; no space */ + if (skip_isa_ioresource_align(dev)) + return start; + if (start & 0x300) + start = (start + 0x3ff) & ~0x3ff; } return start; } diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile index 4a2afa1bac51..b6552b189bcd 100644 --- a/arch/x86/vdso/Makefile +++ b/arch/x86/vdso/Makefile @@ -25,7 +25,7 @@ targets += vdso.so vdso.so.dbg vdso.lds $(vobjs-y) export CPPFLAGS_vdso.lds += -P -C -VDSO_LDFLAGS_vdso.lds = -m elf_x86_64 -Wl,-soname=linux-vdso.so.1 \ +VDSO_LDFLAGS_vdso.lds = -m64 -Wl,-soname=linux-vdso.so.1 \ -Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096 $(obj)/vdso.o: $(src)/vdso.S $(obj)/vdso.so @@ -69,7 +69,7 @@ vdso32.so-$(VDSO32-y) += sysenter vdso32-images = $(vdso32.so-y:%=vdso32-%.so) CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds) -VDSO_LDFLAGS_vdso32.lds = -m elf_i386 -Wl,-soname=linux-gate.so.1 +VDSO_LDFLAGS_vdso32.lds = -m32 -Wl,-soname=linux-gate.so.1 # This makes sure the $(obj) subdirectory exists even though vdso32/ # is not a kbuild sub-make subdirectory. |