diff options
Diffstat (limited to 'arch')
205 files changed, 5926 insertions, 1795 deletions
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 07ba77c19f6c..c8d94dcd8ef7 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -157,7 +157,7 @@ config ARCH_RPC config ARCH_SA1100 bool "SA1100-based" select ISA - select DISCONTIGMEM + select ARCH_DISCONTIGMEM_ENABLE config ARCH_S3C2410 bool "Samsung S3C2410" @@ -346,6 +346,21 @@ config PREEMPT Say Y here if you are building a kernel for a desktop, embedded or real-time system. Say N if you are unsure. +config NO_IDLE_HZ + bool "Dynamic tick timer" + help + Select this option if you want to disable continuous timer ticks + and have them programmed to occur as required. This option saves + power as the system can remain in idle state for longer. + + By default dynamic tick is disabled during the boot, and can be + manually enabled with: + + echo 1 > /sys/devices/system/timer/timer0/dyn_tick + + Alternatively, if you want dynamic tick automatically enabled + during boot, pass "dyntick=enable" via the kernel command string. + config ARCH_DISCONTIGMEM_ENABLE bool default (ARCH_LH7A40X && !LH7A40X_CONTIGMEM) diff --git a/arch/arm/configs/enp2611_defconfig b/arch/arm/configs/enp2611_defconfig index 06fae4b62774..b8c51ee7f1bb 100644 --- a/arch/arm/configs/enp2611_defconfig +++ b/arch/arm/configs/enp2611_defconfig @@ -1,14 +1,13 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.12-rc1-bk2 -# Sun Mar 27 22:08:24 2005 +# Linux kernel version: 2.6.12-git6 +# Sat Jun 25 00:57:29 2005 # CONFIG_ARM=y CONFIG_MMU=y CONFIG_UID16=y CONFIG_RWSEM_GENERIC_SPINLOCK=y CONFIG_GENERIC_CALIBRATE_DELAY=y -CONFIG_GENERIC_IOMAP=y # # Code maturity level options @@ -16,6 +15,7 @@ CONFIG_GENERIC_IOMAP=y CONFIG_EXPERIMENTAL=y CONFIG_CLEAN_COMPILE=y CONFIG_BROKEN_ON_SMP=y +CONFIG_INIT_ENV_ARG_LIMIT=32 # # General setup @@ -35,6 +35,8 @@ CONFIG_EMBEDDED=y CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_ALL is not set # CONFIG_KALLSYMS_EXTRA_PASS is not set +CONFIG_PRINTK=y +CONFIG_BUG=y CONFIG_BASE_FULL=y CONFIG_FUTEX=y CONFIG_EPOLL=y @@ -82,6 +84,7 @@ CONFIG_ARCH_IXP2000=y # CONFIG_ARCH_VERSATILE is not set # CONFIG_ARCH_IMX is not set # CONFIG_ARCH_H720X is not set +# CONFIG_ARCH_AAEC2000 is not set CONFIG_ARCH_SUPPORTS_BIG_ENDIAN=y # @@ -96,6 +99,7 @@ CONFIG_ARCH_ENP2611=y # CONFIG_ARCH_IXDP2800 is not set # CONFIG_ARCH_IXDP2401 is not set # CONFIG_ARCH_IXDP2801 is not set +# CONFIG_IXP2000_SUPPORT_BROKEN_PCI_IO is not set # # Processor Type @@ -106,7 +110,6 @@ CONFIG_CPU_32v5=y CONFIG_CPU_ABRT_EV5T=y CONFIG_CPU_CACHE_VIVT=y CONFIG_CPU_TLB_V4WBI=y -CONFIG_CPU_MINICACHE=y # # Processor Features @@ -118,9 +121,11 @@ CONFIG_XSCALE_PMU=y # # Bus support # +CONFIG_ISA_DMA_API=y CONFIG_PCI=y CONFIG_PCI_LEGACY_PROC=y CONFIG_PCI_NAMES=y +# CONFIG_PCI_DEBUG is not set # # PCCARD (PCMCIA/CardBus) support @@ -130,7 +135,15 @@ CONFIG_PCI_NAMES=y # # Kernel Features # +# CONFIG_SMP is not set # CONFIG_PREEMPT is not set +# CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set +CONFIG_SELECT_MEMORY_MODEL=y +CONFIG_FLATMEM_MANUAL=y +# CONFIG_DISCONTIGMEM_MANUAL is not set +# CONFIG_SPARSEMEM_MANUAL is not set +CONFIG_FLATMEM=y +CONFIG_FLAT_NODE_MEM_MAP=y CONFIG_ALIGNMENT_TRAP=y # @@ -269,7 +282,6 @@ CONFIG_MTD_IXP2000=y # # Block devices # -# CONFIG_BLK_DEV_FD is not set # CONFIG_BLK_CPQ_DA is not set # CONFIG_BLK_CPQ_CISS_DA is not set # CONFIG_BLK_DEV_DAC960 is not set @@ -308,6 +320,7 @@ CONFIG_IOSCHED_CFQ=y # # Fusion MPT device support # +# CONFIG_FUSION is not set # # IEEE 1394 (FireWire) support @@ -329,10 +342,11 @@ CONFIG_NET=y # CONFIG_PACKET=y CONFIG_PACKET_MMAP=y -# CONFIG_NETLINK_DEV is not set CONFIG_UNIX=y # CONFIG_NET_KEY is not set CONFIG_INET=y +CONFIG_IP_FIB_HASH=y +# CONFIG_IP_FIB_TRIE is not set # CONFIG_IP_MULTICAST is not set # CONFIG_IP_ADVANCED_ROUTER is not set CONFIG_IP_PNP=y @@ -349,6 +363,17 @@ CONFIG_SYN_COOKIES=y # CONFIG_INET_TUNNEL is not set # CONFIG_IP_TCPDIAG is not set # CONFIG_IP_TCPDIAG_IPV6 is not set + +# +# TCP congestion control +# +CONFIG_TCP_CONG_BIC=y +CONFIG_TCP_CONG_WESTWOOD=m +CONFIG_TCP_CONG_HTCP=m +# CONFIG_TCP_CONG_HSTCP is not set +# CONFIG_TCP_CONG_HYBLA is not set +# CONFIG_TCP_CONG_VEGAS is not set +# CONFIG_TCP_CONG_SCALABLE is not set # CONFIG_IPV6 is not set # CONFIG_NETFILTER is not set @@ -404,6 +429,7 @@ CONFIG_MII=y # CONFIG_SUNGEM is not set # CONFIG_NET_VENDOR_3COM is not set # CONFIG_SMC91X is not set +# CONFIG_DM9000 is not set # # Tulip family network device support @@ -440,9 +466,11 @@ CONFIG_EEPRO100=y # CONFIG_HAMACHI is not set # CONFIG_YELLOWFIN is not set # CONFIG_R8169 is not set +# CONFIG_SKGE is not set # CONFIG_SK98LIN is not set # CONFIG_VIA_VELOCITY is not set # CONFIG_TIGON3 is not set +# CONFIG_BNX2 is not set # # Ethernet (10000 Mbit) @@ -464,6 +492,7 @@ CONFIG_EEPRO100=y # Wan interfaces # CONFIG_WAN=y +# CONFIG_DSCC4 is not set # CONFIG_LANMEDIA is not set # CONFIG_SYNCLINK_SYNCPPP is not set CONFIG_HDLC=y @@ -526,7 +555,6 @@ CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 # # CONFIG_SERIO is not set # CONFIG_GAMEPORT is not set -CONFIG_SOUND_GAMEPORT=y # # Character devices @@ -547,6 +575,7 @@ CONFIG_SERIAL_8250_NR_UARTS=2 # CONFIG_SERIAL_CORE=y CONFIG_SERIAL_CORE_CONSOLE=y +# CONFIG_SERIAL_JSM is not set CONFIG_UNIX98_PTYS=y CONFIG_LEGACY_PTYS=y CONFIG_LEGACY_PTY_COUNT=256 @@ -613,17 +642,18 @@ CONFIG_I2C_ALGOBIT=y # CONFIG_I2C_AMD8111 is not set # CONFIG_I2C_I801 is not set # CONFIG_I2C_I810 is not set +# CONFIG_I2C_PIIX4 is not set # CONFIG_I2C_ISA is not set # CONFIG_I2C_IXP2000 is not set # CONFIG_I2C_NFORCE2 is not set # CONFIG_I2C_PARPORT_LIGHT is not set -# CONFIG_I2C_PIIX4 is not set # CONFIG_I2C_PROSAVAGE is not set # CONFIG_I2C_SAVAGE4 is not set # CONFIG_SCx200_ACB is not set # CONFIG_I2C_SIS5595 is not set # CONFIG_I2C_SIS630 is not set # CONFIG_I2C_SIS96X is not set +# CONFIG_I2C_STUB is not set # CONFIG_I2C_VIA is not set # CONFIG_I2C_VIAPRO is not set # CONFIG_I2C_VOODOO3 is not set @@ -637,7 +667,9 @@ CONFIG_I2C_SENSOR=y # CONFIG_SENSORS_ADM1025 is not set # CONFIG_SENSORS_ADM1026 is not set # CONFIG_SENSORS_ADM1031 is not set +# CONFIG_SENSORS_ADM9240 is not set # CONFIG_SENSORS_ASB100 is not set +# CONFIG_SENSORS_ATXP1 is not set # CONFIG_SENSORS_DS1621 is not set # CONFIG_SENSORS_FSCHER is not set # CONFIG_SENSORS_FSCPOS is not set @@ -653,6 +685,7 @@ CONFIG_I2C_SENSOR=y # CONFIG_SENSORS_LM85 is not set # CONFIG_SENSORS_LM87 is not set # CONFIG_SENSORS_LM90 is not set +# CONFIG_SENSORS_LM92 is not set # CONFIG_SENSORS_MAX1619 is not set # CONFIG_SENSORS_PC87360 is not set # CONFIG_SENSORS_SMSC47B397 is not set @@ -662,14 +695,19 @@ CONFIG_I2C_SENSOR=y # CONFIG_SENSORS_W83781D is not set # CONFIG_SENSORS_W83L785TS is not set # CONFIG_SENSORS_W83627HF is not set +# CONFIG_SENSORS_W83627EHF is not set # # Other I2C Chip support # +# CONFIG_SENSORS_DS1337 is not set +# CONFIG_SENSORS_DS1374 is not set CONFIG_SENSORS_EEPROM=y # CONFIG_SENSORS_PCF8574 is not set +# CONFIG_SENSORS_PCA9539 is not set # CONFIG_SENSORS_PCF8591 is not set # CONFIG_SENSORS_RTC8564 is not set +# CONFIG_SENSORS_MAX6875 is not set # CONFIG_I2C_DEBUG_CORE is not set # CONFIG_I2C_DEBUG_ALGO is not set # CONFIG_I2C_DEBUG_BUS is not set @@ -723,6 +761,7 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y # CONFIG_EXT2_FS_SECURITY is not set +# CONFIG_EXT2_FS_XIP is not set CONFIG_EXT3_FS=y CONFIG_EXT3_FS_XATTR=y CONFIG_EXT3_FS_POSIX_ACL=y @@ -763,7 +802,6 @@ CONFIG_DNOTIFY=y # CONFIG_PROC_FS=y CONFIG_SYSFS=y -# CONFIG_DEVFS_FS is not set # CONFIG_DEVPTS_FS_XATTR is not set CONFIG_TMPFS=y # CONFIG_TMPFS_XATTR is not set @@ -801,12 +839,14 @@ CONFIG_JFFS2_RTIME=y # CONFIG_NFS_FS=y CONFIG_NFS_V3=y +# CONFIG_NFS_V3_ACL is not set # CONFIG_NFS_V4 is not set # CONFIG_NFS_DIRECTIO is not set # CONFIG_NFSD is not set CONFIG_ROOT_NFS=y CONFIG_LOCKD=y CONFIG_LOCKD_V4=y +CONFIG_NFS_COMMON=y CONFIG_SUNRPC=y # CONFIG_RPCSEC_GSS_KRB5 is not set # CONFIG_RPCSEC_GSS_SPKM3 is not set @@ -891,3 +931,4 @@ CONFIG_CRC32=y # CONFIG_LIBCRC32C is not set CONFIG_ZLIB_INFLATE=y CONFIG_ZLIB_DEFLATE=y +# CONFIG_TEXTSEARCH is not set diff --git a/arch/arm/configs/ixdp2400_defconfig b/arch/arm/configs/ixdp2400_defconfig index 810a450a55d2..3cfbe2ec29ca 100644 --- a/arch/arm/configs/ixdp2400_defconfig +++ b/arch/arm/configs/ixdp2400_defconfig @@ -1,14 +1,13 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.12-rc1-bk2 -# Sun Mar 27 21:13:38 2005 +# Linux kernel version: 2.6.12-git6 +# Sat Jun 25 00:58:38 2005 # CONFIG_ARM=y CONFIG_MMU=y CONFIG_UID16=y CONFIG_RWSEM_GENERIC_SPINLOCK=y CONFIG_GENERIC_CALIBRATE_DELAY=y -CONFIG_GENERIC_IOMAP=y # # Code maturity level options @@ -16,6 +15,7 @@ CONFIG_GENERIC_IOMAP=y CONFIG_EXPERIMENTAL=y CONFIG_CLEAN_COMPILE=y CONFIG_BROKEN_ON_SMP=y +CONFIG_INIT_ENV_ARG_LIMIT=32 # # General setup @@ -35,6 +35,8 @@ CONFIG_EMBEDDED=y CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_ALL is not set # CONFIG_KALLSYMS_EXTRA_PASS is not set +CONFIG_PRINTK=y +CONFIG_BUG=y CONFIG_BASE_FULL=y CONFIG_FUTEX=y CONFIG_EPOLL=y @@ -82,6 +84,7 @@ CONFIG_ARCH_IXP2000=y # CONFIG_ARCH_VERSATILE is not set # CONFIG_ARCH_IMX is not set # CONFIG_ARCH_H720X is not set +# CONFIG_ARCH_AAEC2000 is not set CONFIG_ARCH_SUPPORTS_BIG_ENDIAN=y # @@ -97,6 +100,7 @@ CONFIG_ARCH_IXDP2400=y CONFIG_ARCH_IXDP2X00=y # CONFIG_ARCH_IXDP2401 is not set # CONFIG_ARCH_IXDP2801 is not set +# CONFIG_IXP2000_SUPPORT_BROKEN_PCI_IO is not set # # Processor Type @@ -107,7 +111,6 @@ CONFIG_CPU_32v5=y CONFIG_CPU_ABRT_EV5T=y CONFIG_CPU_CACHE_VIVT=y CONFIG_CPU_TLB_V4WBI=y -CONFIG_CPU_MINICACHE=y # # Processor Features @@ -119,9 +122,11 @@ CONFIG_XSCALE_PMU=y # # Bus support # +CONFIG_ISA_DMA_API=y CONFIG_PCI=y CONFIG_PCI_LEGACY_PROC=y CONFIG_PCI_NAMES=y +# CONFIG_PCI_DEBUG is not set # # PCCARD (PCMCIA/CardBus) support @@ -131,7 +136,15 @@ CONFIG_PCI_NAMES=y # # Kernel Features # +# CONFIG_SMP is not set # CONFIG_PREEMPT is not set +# CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set +CONFIG_SELECT_MEMORY_MODEL=y +CONFIG_FLATMEM_MANUAL=y +# CONFIG_DISCONTIGMEM_MANUAL is not set +# CONFIG_SPARSEMEM_MANUAL is not set +CONFIG_FLATMEM=y +CONFIG_FLAT_NODE_MEM_MAP=y CONFIG_ALIGNMENT_TRAP=y # @@ -270,7 +283,6 @@ CONFIG_MTD_IXP2000=y # # Block devices # -# CONFIG_BLK_DEV_FD is not set # CONFIG_BLK_CPQ_DA is not set # CONFIG_BLK_CPQ_CISS_DA is not set # CONFIG_BLK_DEV_DAC960 is not set @@ -309,6 +321,7 @@ CONFIG_IOSCHED_CFQ=y # # Fusion MPT device support # +# CONFIG_FUSION is not set # # IEEE 1394 (FireWire) support @@ -330,10 +343,11 @@ CONFIG_NET=y # CONFIG_PACKET=y CONFIG_PACKET_MMAP=y -# CONFIG_NETLINK_DEV is not set CONFIG_UNIX=y # CONFIG_NET_KEY is not set CONFIG_INET=y +CONFIG_IP_FIB_HASH=y +# CONFIG_IP_FIB_TRIE is not set # CONFIG_IP_MULTICAST is not set # CONFIG_IP_ADVANCED_ROUTER is not set CONFIG_IP_PNP=y @@ -350,6 +364,17 @@ CONFIG_SYN_COOKIES=y # CONFIG_INET_TUNNEL is not set # CONFIG_IP_TCPDIAG is not set # CONFIG_IP_TCPDIAG_IPV6 is not set + +# +# TCP congestion control +# +CONFIG_TCP_CONG_BIC=y +CONFIG_TCP_CONG_WESTWOOD=m +CONFIG_TCP_CONG_HTCP=m +# CONFIG_TCP_CONG_HSTCP is not set +# CONFIG_TCP_CONG_HYBLA is not set +# CONFIG_TCP_CONG_VEGAS is not set +# CONFIG_TCP_CONG_SCALABLE is not set # CONFIG_IPV6 is not set # CONFIG_NETFILTER is not set @@ -405,6 +430,7 @@ CONFIG_MII=y # CONFIG_SUNGEM is not set # CONFIG_NET_VENDOR_3COM is not set # CONFIG_SMC91X is not set +# CONFIG_DM9000 is not set # # Tulip family network device support @@ -441,9 +467,11 @@ CONFIG_EEPRO100=y # CONFIG_HAMACHI is not set # CONFIG_YELLOWFIN is not set # CONFIG_R8169 is not set +# CONFIG_SKGE is not set # CONFIG_SK98LIN is not set # CONFIG_VIA_VELOCITY is not set # CONFIG_TIGON3 is not set +# CONFIG_BNX2 is not set # # Ethernet (10000 Mbit) @@ -465,6 +493,7 @@ CONFIG_EEPRO100=y # Wan interfaces # CONFIG_WAN=y +# CONFIG_DSCC4 is not set # CONFIG_LANMEDIA is not set # CONFIG_SYNCLINK_SYNCPPP is not set CONFIG_HDLC=y @@ -527,7 +556,6 @@ CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 # # CONFIG_SERIO is not set # CONFIG_GAMEPORT is not set -CONFIG_SOUND_GAMEPORT=y # # Character devices @@ -548,6 +576,7 @@ CONFIG_SERIAL_8250_NR_UARTS=2 # CONFIG_SERIAL_CORE=y CONFIG_SERIAL_CORE_CONSOLE=y +# CONFIG_SERIAL_JSM is not set CONFIG_UNIX98_PTYS=y CONFIG_LEGACY_PTYS=y CONFIG_LEGACY_PTY_COUNT=256 @@ -614,17 +643,18 @@ CONFIG_I2C_ALGOBIT=y # CONFIG_I2C_AMD8111 is not set # CONFIG_I2C_I801 is not set # CONFIG_I2C_I810 is not set +# CONFIG_I2C_PIIX4 is not set # CONFIG_I2C_ISA is not set # CONFIG_I2C_IXP2000 is not set # CONFIG_I2C_NFORCE2 is not set # CONFIG_I2C_PARPORT_LIGHT is not set -# CONFIG_I2C_PIIX4 is not set # CONFIG_I2C_PROSAVAGE is not set # CONFIG_I2C_SAVAGE4 is not set # CONFIG_SCx200_ACB is not set # CONFIG_I2C_SIS5595 is not set # CONFIG_I2C_SIS630 is not set # CONFIG_I2C_SIS96X is not set +# CONFIG_I2C_STUB is not set # CONFIG_I2C_VIA is not set # CONFIG_I2C_VIAPRO is not set # CONFIG_I2C_VOODOO3 is not set @@ -638,7 +668,9 @@ CONFIG_I2C_SENSOR=y # CONFIG_SENSORS_ADM1025 is not set # CONFIG_SENSORS_ADM1026 is not set # CONFIG_SENSORS_ADM1031 is not set +# CONFIG_SENSORS_ADM9240 is not set # CONFIG_SENSORS_ASB100 is not set +# CONFIG_SENSORS_ATXP1 is not set # CONFIG_SENSORS_DS1621 is not set # CONFIG_SENSORS_FSCHER is not set # CONFIG_SENSORS_FSCPOS is not set @@ -654,6 +686,7 @@ CONFIG_I2C_SENSOR=y # CONFIG_SENSORS_LM85 is not set # CONFIG_SENSORS_LM87 is not set # CONFIG_SENSORS_LM90 is not set +# CONFIG_SENSORS_LM92 is not set # CONFIG_SENSORS_MAX1619 is not set # CONFIG_SENSORS_PC87360 is not set # CONFIG_SENSORS_SMSC47B397 is not set @@ -663,14 +696,19 @@ CONFIG_I2C_SENSOR=y # CONFIG_SENSORS_W83781D is not set # CONFIG_SENSORS_W83L785TS is not set # CONFIG_SENSORS_W83627HF is not set +# CONFIG_SENSORS_W83627EHF is not set # # Other I2C Chip support # +# CONFIG_SENSORS_DS1337 is not set +# CONFIG_SENSORS_DS1374 is not set CONFIG_SENSORS_EEPROM=y # CONFIG_SENSORS_PCF8574 is not set +# CONFIG_SENSORS_PCA9539 is not set # CONFIG_SENSORS_PCF8591 is not set # CONFIG_SENSORS_RTC8564 is not set +# CONFIG_SENSORS_MAX6875 is not set # CONFIG_I2C_DEBUG_CORE is not set # CONFIG_I2C_DEBUG_ALGO is not set # CONFIG_I2C_DEBUG_BUS is not set @@ -724,6 +762,7 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y # CONFIG_EXT2_FS_SECURITY is not set +# CONFIG_EXT2_FS_XIP is not set CONFIG_EXT3_FS=y CONFIG_EXT3_FS_XATTR=y CONFIG_EXT3_FS_POSIX_ACL=y @@ -764,7 +803,6 @@ CONFIG_DNOTIFY=y # CONFIG_PROC_FS=y CONFIG_SYSFS=y -# CONFIG_DEVFS_FS is not set # CONFIG_DEVPTS_FS_XATTR is not set CONFIG_TMPFS=y # CONFIG_TMPFS_XATTR is not set @@ -802,12 +840,14 @@ CONFIG_JFFS2_RTIME=y # CONFIG_NFS_FS=y CONFIG_NFS_V3=y +# CONFIG_NFS_V3_ACL is not set # CONFIG_NFS_V4 is not set # CONFIG_NFS_DIRECTIO is not set # CONFIG_NFSD is not set CONFIG_ROOT_NFS=y CONFIG_LOCKD=y CONFIG_LOCKD_V4=y +CONFIG_NFS_COMMON=y CONFIG_SUNRPC=y # CONFIG_RPCSEC_GSS_KRB5 is not set # CONFIG_RPCSEC_GSS_SPKM3 is not set @@ -892,3 +932,4 @@ CONFIG_CRC32=y # CONFIG_LIBCRC32C is not set CONFIG_ZLIB_INFLATE=y CONFIG_ZLIB_DEFLATE=y +# CONFIG_TEXTSEARCH is not set diff --git a/arch/arm/configs/ixdp2401_defconfig b/arch/arm/configs/ixdp2401_defconfig index 72e1b940e975..5c87e8e6969b 100644 --- a/arch/arm/configs/ixdp2401_defconfig +++ b/arch/arm/configs/ixdp2401_defconfig @@ -1,14 +1,13 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.12-rc1-bk2 -# Sun Mar 27 21:53:55 2005 +# Linux kernel version: 2.6.12-git6 +# Sat Jun 25 00:59:35 2005 # CONFIG_ARM=y CONFIG_MMU=y CONFIG_UID16=y CONFIG_RWSEM_GENERIC_SPINLOCK=y CONFIG_GENERIC_CALIBRATE_DELAY=y -CONFIG_GENERIC_IOMAP=y # # Code maturity level options @@ -16,6 +15,7 @@ CONFIG_GENERIC_IOMAP=y CONFIG_EXPERIMENTAL=y CONFIG_CLEAN_COMPILE=y CONFIG_BROKEN_ON_SMP=y +CONFIG_INIT_ENV_ARG_LIMIT=32 # # General setup @@ -35,6 +35,8 @@ CONFIG_EMBEDDED=y CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_ALL is not set # CONFIG_KALLSYMS_EXTRA_PASS is not set +CONFIG_PRINTK=y +CONFIG_BUG=y CONFIG_BASE_FULL=y CONFIG_FUTEX=y CONFIG_EPOLL=y @@ -82,6 +84,7 @@ CONFIG_ARCH_IXP2000=y # CONFIG_ARCH_VERSATILE is not set # CONFIG_ARCH_IMX is not set # CONFIG_ARCH_H720X is not set +# CONFIG_ARCH_AAEC2000 is not set CONFIG_ARCH_SUPPORTS_BIG_ENDIAN=y # @@ -97,6 +100,7 @@ CONFIG_ARCH_SUPPORTS_BIG_ENDIAN=y CONFIG_ARCH_IXDP2401=y # CONFIG_ARCH_IXDP2801 is not set CONFIG_ARCH_IXDP2X01=y +# CONFIG_IXP2000_SUPPORT_BROKEN_PCI_IO is not set # # Processor Type @@ -107,7 +111,6 @@ CONFIG_CPU_32v5=y CONFIG_CPU_ABRT_EV5T=y CONFIG_CPU_CACHE_VIVT=y CONFIG_CPU_TLB_V4WBI=y -CONFIG_CPU_MINICACHE=y # # Processor Features @@ -119,9 +122,11 @@ CONFIG_XSCALE_PMU=y # # Bus support # +CONFIG_ISA_DMA_API=y CONFIG_PCI=y CONFIG_PCI_LEGACY_PROC=y CONFIG_PCI_NAMES=y +# CONFIG_PCI_DEBUG is not set # # PCCARD (PCMCIA/CardBus) support @@ -131,7 +136,15 @@ CONFIG_PCI_NAMES=y # # Kernel Features # +# CONFIG_SMP is not set # CONFIG_PREEMPT is not set +# CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set +CONFIG_SELECT_MEMORY_MODEL=y +CONFIG_FLATMEM_MANUAL=y +# CONFIG_DISCONTIGMEM_MANUAL is not set +# CONFIG_SPARSEMEM_MANUAL is not set +CONFIG_FLATMEM=y +CONFIG_FLAT_NODE_MEM_MAP=y CONFIG_ALIGNMENT_TRAP=y # @@ -270,7 +283,6 @@ CONFIG_MTD_IXP2000=y # # Block devices # -# CONFIG_BLK_DEV_FD is not set # CONFIG_BLK_CPQ_DA is not set # CONFIG_BLK_CPQ_CISS_DA is not set # CONFIG_BLK_DEV_DAC960 is not set @@ -309,6 +321,7 @@ CONFIG_IOSCHED_CFQ=y # # Fusion MPT device support # +# CONFIG_FUSION is not set # # IEEE 1394 (FireWire) support @@ -330,10 +343,11 @@ CONFIG_NET=y # CONFIG_PACKET=y CONFIG_PACKET_MMAP=y -# CONFIG_NETLINK_DEV is not set CONFIG_UNIX=y # CONFIG_NET_KEY is not set CONFIG_INET=y +CONFIG_IP_FIB_HASH=y +# CONFIG_IP_FIB_TRIE is not set # CONFIG_IP_MULTICAST is not set # CONFIG_IP_ADVANCED_ROUTER is not set CONFIG_IP_PNP=y @@ -350,6 +364,17 @@ CONFIG_SYN_COOKIES=y # CONFIG_INET_TUNNEL is not set CONFIG_IP_TCPDIAG=y # CONFIG_IP_TCPDIAG_IPV6 is not set + +# +# TCP congestion control +# +CONFIG_TCP_CONG_BIC=y +CONFIG_TCP_CONG_WESTWOOD=m +CONFIG_TCP_CONG_HTCP=m +# CONFIG_TCP_CONG_HSTCP is not set +# CONFIG_TCP_CONG_HYBLA is not set +# CONFIG_TCP_CONG_VEGAS is not set +# CONFIG_TCP_CONG_SCALABLE is not set # CONFIG_IPV6 is not set # CONFIG_NETFILTER is not set @@ -405,6 +430,7 @@ CONFIG_MII=y # CONFIG_SUNGEM is not set # CONFIG_NET_VENDOR_3COM is not set # CONFIG_SMC91X is not set +# CONFIG_DM9000 is not set # # Tulip family network device support @@ -442,9 +468,11 @@ CONFIG_EEPRO100=y # CONFIG_HAMACHI is not set # CONFIG_YELLOWFIN is not set # CONFIG_R8169 is not set +# CONFIG_SKGE is not set # CONFIG_SK98LIN is not set # CONFIG_VIA_VELOCITY is not set # CONFIG_TIGON3 is not set +# CONFIG_BNX2 is not set # # Ethernet (10000 Mbit) @@ -466,6 +494,7 @@ CONFIG_EEPRO100=y # Wan interfaces # CONFIG_WAN=y +# CONFIG_DSCC4 is not set # CONFIG_LANMEDIA is not set # CONFIG_SYNCLINK_SYNCPPP is not set CONFIG_HDLC=y @@ -528,7 +557,6 @@ CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 # # CONFIG_SERIO is not set # CONFIG_GAMEPORT is not set -CONFIG_SOUND_GAMEPORT=y # # Character devices @@ -549,6 +577,7 @@ CONFIG_SERIAL_8250_NR_UARTS=2 # CONFIG_SERIAL_CORE=y CONFIG_SERIAL_CORE_CONSOLE=y +# CONFIG_SERIAL_JSM is not set CONFIG_UNIX98_PTYS=y CONFIG_LEGACY_PTYS=y CONFIG_LEGACY_PTY_COUNT=256 @@ -615,17 +644,18 @@ CONFIG_I2C_ALGOBIT=y # CONFIG_I2C_AMD8111 is not set # CONFIG_I2C_I801 is not set # CONFIG_I2C_I810 is not set +# CONFIG_I2C_PIIX4 is not set # CONFIG_I2C_ISA is not set # CONFIG_I2C_IXP2000 is not set # CONFIG_I2C_NFORCE2 is not set # CONFIG_I2C_PARPORT_LIGHT is not set -# CONFIG_I2C_PIIX4 is not set # CONFIG_I2C_PROSAVAGE is not set # CONFIG_I2C_SAVAGE4 is not set # CONFIG_SCx200_ACB is not set # CONFIG_I2C_SIS5595 is not set # CONFIG_I2C_SIS630 is not set # CONFIG_I2C_SIS96X is not set +# CONFIG_I2C_STUB is not set # CONFIG_I2C_VIA is not set # CONFIG_I2C_VIAPRO is not set # CONFIG_I2C_VOODOO3 is not set @@ -639,7 +669,9 @@ CONFIG_I2C_SENSOR=y # CONFIG_SENSORS_ADM1025 is not set # CONFIG_SENSORS_ADM1026 is not set # CONFIG_SENSORS_ADM1031 is not set +# CONFIG_SENSORS_ADM9240 is not set # CONFIG_SENSORS_ASB100 is not set +# CONFIG_SENSORS_ATXP1 is not set # CONFIG_SENSORS_DS1621 is not set # CONFIG_SENSORS_FSCHER is not set # CONFIG_SENSORS_FSCPOS is not set @@ -655,6 +687,7 @@ CONFIG_I2C_SENSOR=y # CONFIG_SENSORS_LM85 is not set # CONFIG_SENSORS_LM87 is not set # CONFIG_SENSORS_LM90 is not set +# CONFIG_SENSORS_LM92 is not set # CONFIG_SENSORS_MAX1619 is not set # CONFIG_SENSORS_PC87360 is not set # CONFIG_SENSORS_SMSC47B397 is not set @@ -664,14 +697,19 @@ CONFIG_I2C_SENSOR=y # CONFIG_SENSORS_W83781D is not set # CONFIG_SENSORS_W83L785TS is not set # CONFIG_SENSORS_W83627HF is not set +# CONFIG_SENSORS_W83627EHF is not set # # Other I2C Chip support # +# CONFIG_SENSORS_DS1337 is not set +# CONFIG_SENSORS_DS1374 is not set CONFIG_SENSORS_EEPROM=y # CONFIG_SENSORS_PCF8574 is not set +# CONFIG_SENSORS_PCA9539 is not set # CONFIG_SENSORS_PCF8591 is not set # CONFIG_SENSORS_RTC8564 is not set +# CONFIG_SENSORS_MAX6875 is not set # CONFIG_I2C_DEBUG_CORE is not set # CONFIG_I2C_DEBUG_ALGO is not set # CONFIG_I2C_DEBUG_BUS is not set @@ -725,6 +763,7 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y # CONFIG_EXT2_FS_SECURITY is not set +# CONFIG_EXT2_FS_XIP is not set CONFIG_EXT3_FS=y CONFIG_EXT3_FS_XATTR=y CONFIG_EXT3_FS_POSIX_ACL=y @@ -765,7 +804,6 @@ CONFIG_DNOTIFY=y # CONFIG_PROC_FS=y CONFIG_SYSFS=y -# CONFIG_DEVFS_FS is not set # CONFIG_DEVPTS_FS_XATTR is not set CONFIG_TMPFS=y # CONFIG_TMPFS_XATTR is not set @@ -803,12 +841,14 @@ CONFIG_JFFS2_RTIME=y # CONFIG_NFS_FS=y CONFIG_NFS_V3=y +# CONFIG_NFS_V3_ACL is not set # CONFIG_NFS_V4 is not set # CONFIG_NFS_DIRECTIO is not set # CONFIG_NFSD is not set CONFIG_ROOT_NFS=y CONFIG_LOCKD=y CONFIG_LOCKD_V4=y +CONFIG_NFS_COMMON=y CONFIG_SUNRPC=y # CONFIG_RPCSEC_GSS_KRB5 is not set # CONFIG_RPCSEC_GSS_SPKM3 is not set @@ -893,3 +933,4 @@ CONFIG_CRC32=y # CONFIG_LIBCRC32C is not set CONFIG_ZLIB_INFLATE=y CONFIG_ZLIB_DEFLATE=y +# CONFIG_TEXTSEARCH is not set diff --git a/arch/arm/configs/ixdp2800_defconfig b/arch/arm/configs/ixdp2800_defconfig index 1592e45f0278..3cb561a551cb 100644 --- a/arch/arm/configs/ixdp2800_defconfig +++ b/arch/arm/configs/ixdp2800_defconfig @@ -1,14 +1,13 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.12-rc1-bk2 -# Sun Mar 27 22:15:23 2005 +# Linux kernel version: 2.6.12-git6 +# Sat Jun 25 01:00:27 2005 # CONFIG_ARM=y CONFIG_MMU=y CONFIG_UID16=y CONFIG_RWSEM_GENERIC_SPINLOCK=y CONFIG_GENERIC_CALIBRATE_DELAY=y -CONFIG_GENERIC_IOMAP=y # # Code maturity level options @@ -16,6 +15,7 @@ CONFIG_GENERIC_IOMAP=y CONFIG_EXPERIMENTAL=y CONFIG_CLEAN_COMPILE=y CONFIG_BROKEN_ON_SMP=y +CONFIG_INIT_ENV_ARG_LIMIT=32 # # General setup @@ -35,6 +35,8 @@ CONFIG_EMBEDDED=y CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_ALL is not set # CONFIG_KALLSYMS_EXTRA_PASS is not set +CONFIG_PRINTK=y +CONFIG_BUG=y CONFIG_BASE_FULL=y CONFIG_FUTEX=y CONFIG_EPOLL=y @@ -82,6 +84,7 @@ CONFIG_ARCH_IXP2000=y # CONFIG_ARCH_VERSATILE is not set # CONFIG_ARCH_IMX is not set # CONFIG_ARCH_H720X is not set +# CONFIG_ARCH_AAEC2000 is not set CONFIG_ARCH_SUPPORTS_BIG_ENDIAN=y # @@ -97,6 +100,7 @@ CONFIG_ARCH_IXDP2800=y CONFIG_ARCH_IXDP2X00=y # CONFIG_ARCH_IXDP2401 is not set # CONFIG_ARCH_IXDP2801 is not set +# CONFIG_IXP2000_SUPPORT_BROKEN_PCI_IO is not set # # Processor Type @@ -107,7 +111,6 @@ CONFIG_CPU_32v5=y CONFIG_CPU_ABRT_EV5T=y CONFIG_CPU_CACHE_VIVT=y CONFIG_CPU_TLB_V4WBI=y -CONFIG_CPU_MINICACHE=y # # Processor Features @@ -119,9 +122,11 @@ CONFIG_XSCALE_PMU=y # # Bus support # +CONFIG_ISA_DMA_API=y CONFIG_PCI=y CONFIG_PCI_LEGACY_PROC=y CONFIG_PCI_NAMES=y +# CONFIG_PCI_DEBUG is not set # # PCCARD (PCMCIA/CardBus) support @@ -131,7 +136,15 @@ CONFIG_PCI_NAMES=y # # Kernel Features # +# CONFIG_SMP is not set # CONFIG_PREEMPT is not set +# CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set +CONFIG_SELECT_MEMORY_MODEL=y +CONFIG_FLATMEM_MANUAL=y +# CONFIG_DISCONTIGMEM_MANUAL is not set +# CONFIG_SPARSEMEM_MANUAL is not set +CONFIG_FLATMEM=y +CONFIG_FLAT_NODE_MEM_MAP=y CONFIG_ALIGNMENT_TRAP=y # @@ -270,7 +283,6 @@ CONFIG_MTD_IXP2000=y # # Block devices # -# CONFIG_BLK_DEV_FD is not set # CONFIG_BLK_CPQ_DA is not set # CONFIG_BLK_CPQ_CISS_DA is not set # CONFIG_BLK_DEV_DAC960 is not set @@ -309,6 +321,7 @@ CONFIG_IOSCHED_CFQ=y # # Fusion MPT device support # +# CONFIG_FUSION is not set # # IEEE 1394 (FireWire) support @@ -330,10 +343,11 @@ CONFIG_NET=y # CONFIG_PACKET=y CONFIG_PACKET_MMAP=y -# CONFIG_NETLINK_DEV is not set CONFIG_UNIX=y # CONFIG_NET_KEY is not set CONFIG_INET=y +CONFIG_IP_FIB_HASH=y +# CONFIG_IP_FIB_TRIE is not set # CONFIG_IP_MULTICAST is not set # CONFIG_IP_ADVANCED_ROUTER is not set CONFIG_IP_PNP=y @@ -350,6 +364,17 @@ CONFIG_SYN_COOKIES=y # CONFIG_INET_TUNNEL is not set # CONFIG_IP_TCPDIAG is not set # CONFIG_IP_TCPDIAG_IPV6 is not set + +# +# TCP congestion control +# +CONFIG_TCP_CONG_BIC=y +CONFIG_TCP_CONG_WESTWOOD=m +CONFIG_TCP_CONG_HTCP=m +# CONFIG_TCP_CONG_HSTCP is not set +# CONFIG_TCP_CONG_HYBLA is not set +# CONFIG_TCP_CONG_VEGAS is not set +# CONFIG_TCP_CONG_SCALABLE is not set # CONFIG_IPV6 is not set # CONFIG_NETFILTER is not set @@ -405,6 +430,7 @@ CONFIG_MII=y # CONFIG_SUNGEM is not set # CONFIG_NET_VENDOR_3COM is not set # CONFIG_SMC91X is not set +# CONFIG_DM9000 is not set # # Tulip family network device support @@ -441,9 +467,11 @@ CONFIG_EEPRO100=y # CONFIG_HAMACHI is not set # CONFIG_YELLOWFIN is not set # CONFIG_R8169 is not set +# CONFIG_SKGE is not set # CONFIG_SK98LIN is not set # CONFIG_VIA_VELOCITY is not set # CONFIG_TIGON3 is not set +# CONFIG_BNX2 is not set # # Ethernet (10000 Mbit) @@ -465,6 +493,7 @@ CONFIG_EEPRO100=y # Wan interfaces # CONFIG_WAN=y +# CONFIG_DSCC4 is not set # CONFIG_LANMEDIA is not set # CONFIG_SYNCLINK_SYNCPPP is not set CONFIG_HDLC=y @@ -527,7 +556,6 @@ CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 # # CONFIG_SERIO is not set # CONFIG_GAMEPORT is not set -CONFIG_SOUND_GAMEPORT=y # # Character devices @@ -548,6 +576,7 @@ CONFIG_SERIAL_8250_NR_UARTS=2 # CONFIG_SERIAL_CORE=y CONFIG_SERIAL_CORE_CONSOLE=y +# CONFIG_SERIAL_JSM is not set CONFIG_UNIX98_PTYS=y CONFIG_LEGACY_PTYS=y CONFIG_LEGACY_PTY_COUNT=256 @@ -614,17 +643,18 @@ CONFIG_I2C_ALGOBIT=y # CONFIG_I2C_AMD8111 is not set # CONFIG_I2C_I801 is not set # CONFIG_I2C_I810 is not set +# CONFIG_I2C_PIIX4 is not set # CONFIG_I2C_ISA is not set # CONFIG_I2C_IXP2000 is not set # CONFIG_I2C_NFORCE2 is not set # CONFIG_I2C_PARPORT_LIGHT is not set -# CONFIG_I2C_PIIX4 is not set # CONFIG_I2C_PROSAVAGE is not set # CONFIG_I2C_SAVAGE4 is not set # CONFIG_SCx200_ACB is not set # CONFIG_I2C_SIS5595 is not set # CONFIG_I2C_SIS630 is not set # CONFIG_I2C_SIS96X is not set +# CONFIG_I2C_STUB is not set # CONFIG_I2C_VIA is not set # CONFIG_I2C_VIAPRO is not set # CONFIG_I2C_VOODOO3 is not set @@ -638,7 +668,9 @@ CONFIG_I2C_SENSOR=y # CONFIG_SENSORS_ADM1025 is not set # CONFIG_SENSORS_ADM1026 is not set # CONFIG_SENSORS_ADM1031 is not set +# CONFIG_SENSORS_ADM9240 is not set # CONFIG_SENSORS_ASB100 is not set +# CONFIG_SENSORS_ATXP1 is not set # CONFIG_SENSORS_DS1621 is not set # CONFIG_SENSORS_FSCHER is not set # CONFIG_SENSORS_FSCPOS is not set @@ -654,6 +686,7 @@ CONFIG_I2C_SENSOR=y # CONFIG_SENSORS_LM85 is not set # CONFIG_SENSORS_LM87 is not set # CONFIG_SENSORS_LM90 is not set +# CONFIG_SENSORS_LM92 is not set # CONFIG_SENSORS_MAX1619 is not set # CONFIG_SENSORS_PC87360 is not set # CONFIG_SENSORS_SMSC47B397 is not set @@ -663,14 +696,19 @@ CONFIG_I2C_SENSOR=y # CONFIG_SENSORS_W83781D is not set # CONFIG_SENSORS_W83L785TS is not set # CONFIG_SENSORS_W83627HF is not set +# CONFIG_SENSORS_W83627EHF is not set # # Other I2C Chip support # +# CONFIG_SENSORS_DS1337 is not set +# CONFIG_SENSORS_DS1374 is not set CONFIG_SENSORS_EEPROM=y # CONFIG_SENSORS_PCF8574 is not set +# CONFIG_SENSORS_PCA9539 is not set # CONFIG_SENSORS_PCF8591 is not set # CONFIG_SENSORS_RTC8564 is not set +# CONFIG_SENSORS_MAX6875 is not set # CONFIG_I2C_DEBUG_CORE is not set # CONFIG_I2C_DEBUG_ALGO is not set # CONFIG_I2C_DEBUG_BUS is not set @@ -724,6 +762,7 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y # CONFIG_EXT2_FS_SECURITY is not set +# CONFIG_EXT2_FS_XIP is not set CONFIG_EXT3_FS=y CONFIG_EXT3_FS_XATTR=y CONFIG_EXT3_FS_POSIX_ACL=y @@ -764,7 +803,6 @@ CONFIG_DNOTIFY=y # CONFIG_PROC_FS=y CONFIG_SYSFS=y -# CONFIG_DEVFS_FS is not set # CONFIG_DEVPTS_FS_XATTR is not set CONFIG_TMPFS=y # CONFIG_TMPFS_XATTR is not set @@ -802,12 +840,14 @@ CONFIG_JFFS2_RTIME=y # CONFIG_NFS_FS=y CONFIG_NFS_V3=y +# CONFIG_NFS_V3_ACL is not set # CONFIG_NFS_V4 is not set # CONFIG_NFS_DIRECTIO is not set # CONFIG_NFSD is not set CONFIG_ROOT_NFS=y CONFIG_LOCKD=y CONFIG_LOCKD_V4=y +CONFIG_NFS_COMMON=y CONFIG_SUNRPC=y # CONFIG_RPCSEC_GSS_KRB5 is not set # CONFIG_RPCSEC_GSS_SPKM3 is not set @@ -892,3 +932,4 @@ CONFIG_CRC32=y # CONFIG_LIBCRC32C is not set CONFIG_ZLIB_INFLATE=y CONFIG_ZLIB_DEFLATE=y +# CONFIG_TEXTSEARCH is not set diff --git a/arch/arm/configs/ixdp2801_defconfig b/arch/arm/configs/ixdp2801_defconfig index f1afe3d09ec6..b1e162f29cb9 100644 --- a/arch/arm/configs/ixdp2801_defconfig +++ b/arch/arm/configs/ixdp2801_defconfig @@ -1,14 +1,13 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.12-rc1-bk2 -# Sun Mar 27 22:39:19 2005 +# Linux kernel version: 2.6.12-git6 +# Sat Jun 25 01:01:18 2005 # CONFIG_ARM=y CONFIG_MMU=y CONFIG_UID16=y CONFIG_RWSEM_GENERIC_SPINLOCK=y CONFIG_GENERIC_CALIBRATE_DELAY=y -CONFIG_GENERIC_IOMAP=y # # Code maturity level options @@ -16,6 +15,7 @@ CONFIG_GENERIC_IOMAP=y CONFIG_EXPERIMENTAL=y CONFIG_CLEAN_COMPILE=y CONFIG_BROKEN_ON_SMP=y +CONFIG_INIT_ENV_ARG_LIMIT=32 # # General setup @@ -35,6 +35,8 @@ CONFIG_EMBEDDED=y CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_ALL is not set # CONFIG_KALLSYMS_EXTRA_PASS is not set +CONFIG_PRINTK=y +CONFIG_BUG=y CONFIG_BASE_FULL=y CONFIG_FUTEX=y CONFIG_EPOLL=y @@ -82,6 +84,7 @@ CONFIG_ARCH_IXP2000=y # CONFIG_ARCH_VERSATILE is not set # CONFIG_ARCH_IMX is not set # CONFIG_ARCH_H720X is not set +# CONFIG_ARCH_AAEC2000 is not set CONFIG_ARCH_SUPPORTS_BIG_ENDIAN=y # @@ -97,6 +100,7 @@ CONFIG_ARCH_SUPPORTS_BIG_ENDIAN=y # CONFIG_ARCH_IXDP2401 is not set CONFIG_ARCH_IXDP2801=y CONFIG_ARCH_IXDP2X01=y +# CONFIG_IXP2000_SUPPORT_BROKEN_PCI_IO is not set # # Processor Type @@ -107,7 +111,6 @@ CONFIG_CPU_32v5=y CONFIG_CPU_ABRT_EV5T=y CONFIG_CPU_CACHE_VIVT=y CONFIG_CPU_TLB_V4WBI=y -CONFIG_CPU_MINICACHE=y # # Processor Features @@ -119,9 +122,11 @@ CONFIG_XSCALE_PMU=y # # Bus support # +CONFIG_ISA_DMA_API=y CONFIG_PCI=y CONFIG_PCI_LEGACY_PROC=y CONFIG_PCI_NAMES=y +# CONFIG_PCI_DEBUG is not set # # PCCARD (PCMCIA/CardBus) support @@ -131,7 +136,15 @@ CONFIG_PCI_NAMES=y # # Kernel Features # +# CONFIG_SMP is not set # CONFIG_PREEMPT is not set +# CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set +CONFIG_SELECT_MEMORY_MODEL=y +CONFIG_FLATMEM_MANUAL=y +# CONFIG_DISCONTIGMEM_MANUAL is not set +# CONFIG_SPARSEMEM_MANUAL is not set +CONFIG_FLATMEM=y +CONFIG_FLAT_NODE_MEM_MAP=y CONFIG_ALIGNMENT_TRAP=y # @@ -270,7 +283,6 @@ CONFIG_MTD_IXP2000=y # # Block devices # -# CONFIG_BLK_DEV_FD is not set # CONFIG_BLK_CPQ_DA is not set # CONFIG_BLK_CPQ_CISS_DA is not set # CONFIG_BLK_DEV_DAC960 is not set @@ -309,6 +321,7 @@ CONFIG_IOSCHED_CFQ=y # # Fusion MPT device support # +# CONFIG_FUSION is not set # # IEEE 1394 (FireWire) support @@ -330,10 +343,11 @@ CONFIG_NET=y # CONFIG_PACKET=y CONFIG_PACKET_MMAP=y -# CONFIG_NETLINK_DEV is not set CONFIG_UNIX=y # CONFIG_NET_KEY is not set CONFIG_INET=y +CONFIG_IP_FIB_HASH=y +# CONFIG_IP_FIB_TRIE is not set # CONFIG_IP_MULTICAST is not set # CONFIG_IP_ADVANCED_ROUTER is not set CONFIG_IP_PNP=y @@ -350,6 +364,17 @@ CONFIG_SYN_COOKIES=y # CONFIG_INET_TUNNEL is not set # CONFIG_IP_TCPDIAG is not set # CONFIG_IP_TCPDIAG_IPV6 is not set + +# +# TCP congestion control +# +CONFIG_TCP_CONG_BIC=y +CONFIG_TCP_CONG_WESTWOOD=m +CONFIG_TCP_CONG_HTCP=m +# CONFIG_TCP_CONG_HSTCP is not set +# CONFIG_TCP_CONG_HYBLA is not set +# CONFIG_TCP_CONG_VEGAS is not set +# CONFIG_TCP_CONG_SCALABLE is not set # CONFIG_IPV6 is not set # CONFIG_NETFILTER is not set @@ -405,6 +430,7 @@ CONFIG_MII=y # CONFIG_SUNGEM is not set # CONFIG_NET_VENDOR_3COM is not set # CONFIG_SMC91X is not set +# CONFIG_DM9000 is not set # # Tulip family network device support @@ -442,9 +468,11 @@ CONFIG_EEPRO100=y # CONFIG_HAMACHI is not set # CONFIG_YELLOWFIN is not set # CONFIG_R8169 is not set +# CONFIG_SKGE is not set # CONFIG_SK98LIN is not set # CONFIG_VIA_VELOCITY is not set # CONFIG_TIGON3 is not set +# CONFIG_BNX2 is not set # # Ethernet (10000 Mbit) @@ -466,6 +494,7 @@ CONFIG_EEPRO100=y # Wan interfaces # CONFIG_WAN=y +# CONFIG_DSCC4 is not set # CONFIG_LANMEDIA is not set # CONFIG_SYNCLINK_SYNCPPP is not set CONFIG_HDLC=y @@ -528,7 +557,6 @@ CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 # # CONFIG_SERIO is not set # CONFIG_GAMEPORT is not set -CONFIG_SOUND_GAMEPORT=y # # Character devices @@ -549,6 +577,7 @@ CONFIG_SERIAL_8250_NR_UARTS=2 # CONFIG_SERIAL_CORE=y CONFIG_SERIAL_CORE_CONSOLE=y +# CONFIG_SERIAL_JSM is not set CONFIG_UNIX98_PTYS=y CONFIG_LEGACY_PTYS=y CONFIG_LEGACY_PTY_COUNT=256 @@ -615,17 +644,18 @@ CONFIG_I2C_ALGOBIT=y # CONFIG_I2C_AMD8111 is not set # CONFIG_I2C_I801 is not set # CONFIG_I2C_I810 is not set +# CONFIG_I2C_PIIX4 is not set # CONFIG_I2C_ISA is not set # CONFIG_I2C_IXP2000 is not set # CONFIG_I2C_NFORCE2 is not set # CONFIG_I2C_PARPORT_LIGHT is not set -# CONFIG_I2C_PIIX4 is not set # CONFIG_I2C_PROSAVAGE is not set # CONFIG_I2C_SAVAGE4 is not set # CONFIG_SCx200_ACB is not set # CONFIG_I2C_SIS5595 is not set # CONFIG_I2C_SIS630 is not set # CONFIG_I2C_SIS96X is not set +# CONFIG_I2C_STUB is not set # CONFIG_I2C_VIA is not set # CONFIG_I2C_VIAPRO is not set # CONFIG_I2C_VOODOO3 is not set @@ -639,7 +669,9 @@ CONFIG_I2C_SENSOR=y # CONFIG_SENSORS_ADM1025 is not set # CONFIG_SENSORS_ADM1026 is not set # CONFIG_SENSORS_ADM1031 is not set +# CONFIG_SENSORS_ADM9240 is not set # CONFIG_SENSORS_ASB100 is not set +# CONFIG_SENSORS_ATXP1 is not set # CONFIG_SENSORS_DS1621 is not set # CONFIG_SENSORS_FSCHER is not set # CONFIG_SENSORS_FSCPOS is not set @@ -655,6 +687,7 @@ CONFIG_I2C_SENSOR=y # CONFIG_SENSORS_LM85 is not set # CONFIG_SENSORS_LM87 is not set # CONFIG_SENSORS_LM90 is not set +# CONFIG_SENSORS_LM92 is not set # CONFIG_SENSORS_MAX1619 is not set # CONFIG_SENSORS_PC87360 is not set # CONFIG_SENSORS_SMSC47B397 is not set @@ -664,14 +697,19 @@ CONFIG_I2C_SENSOR=y # CONFIG_SENSORS_W83781D is not set # CONFIG_SENSORS_W83L785TS is not set # CONFIG_SENSORS_W83627HF is not set +# CONFIG_SENSORS_W83627EHF is not set # # Other I2C Chip support # +# CONFIG_SENSORS_DS1337 is not set +# CONFIG_SENSORS_DS1374 is not set CONFIG_SENSORS_EEPROM=y # CONFIG_SENSORS_PCF8574 is not set +# CONFIG_SENSORS_PCA9539 is not set # CONFIG_SENSORS_PCF8591 is not set # CONFIG_SENSORS_RTC8564 is not set +# CONFIG_SENSORS_MAX6875 is not set # CONFIG_I2C_DEBUG_CORE is not set # CONFIG_I2C_DEBUG_ALGO is not set # CONFIG_I2C_DEBUG_BUS is not set @@ -725,6 +763,7 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y # CONFIG_EXT2_FS_SECURITY is not set +# CONFIG_EXT2_FS_XIP is not set CONFIG_EXT3_FS=y CONFIG_EXT3_FS_XATTR=y CONFIG_EXT3_FS_POSIX_ACL=y @@ -765,7 +804,6 @@ CONFIG_DNOTIFY=y # CONFIG_PROC_FS=y CONFIG_SYSFS=y -# CONFIG_DEVFS_FS is not set # CONFIG_DEVPTS_FS_XATTR is not set CONFIG_TMPFS=y # CONFIG_TMPFS_XATTR is not set @@ -803,12 +841,14 @@ CONFIG_JFFS2_RTIME=y # CONFIG_NFS_FS=y CONFIG_NFS_V3=y +# CONFIG_NFS_V3_ACL is not set # CONFIG_NFS_V4 is not set # CONFIG_NFS_DIRECTIO is not set # CONFIG_NFSD is not set CONFIG_ROOT_NFS=y CONFIG_LOCKD=y CONFIG_LOCKD_V4=y +CONFIG_NFS_COMMON=y CONFIG_SUNRPC=y # CONFIG_RPCSEC_GSS_KRB5 is not set # CONFIG_RPCSEC_GSS_SPKM3 is not set @@ -893,3 +933,4 @@ CONFIG_CRC32=y # CONFIG_LIBCRC32C is not set CONFIG_ZLIB_INFLATE=y CONFIG_ZLIB_DEFLATE=y +# CONFIG_TEXTSEARCH is not set diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c index ff187f4308f0..395137a8fad2 100644 --- a/arch/arm/kernel/irq.c +++ b/arch/arm/kernel/irq.c @@ -4,6 +4,10 @@ * Copyright (C) 1992 Linus Torvalds * Modifications for ARM processor Copyright (C) 1995-2000 Russell King. * + * Support for Dynamic Tick Timer Copyright (C) 2004-2005 Nokia Corporation. + * Dynamic Tick Timer written by Tony Lindgren <tony@atomide.com> and + * Tuukka Tikkanen <tuukka.tikkanen@elektrobit.com>. + * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. @@ -37,6 +41,7 @@ #include <asm/irq.h> #include <asm/system.h> #include <asm/mach/irq.h> +#include <asm/mach/time.h> /* * Maximum IRQ count. Currently, this is arbitary. However, it should @@ -329,6 +334,15 @@ __do_irq(unsigned int irq, struct irqaction *action, struct pt_regs *regs) spin_unlock(&irq_controller_lock); +#ifdef CONFIG_NO_IDLE_HZ + if (!(action->flags & SA_TIMER) && system_timer->dyn_tick != NULL) { + write_seqlock(&xtime_lock); + if (system_timer->dyn_tick->state & DYN_TICK_ENABLED) + system_timer->dyn_tick->handler(irq, 0, regs); + write_sequnlock(&xtime_lock); + } +#endif + if (!(action->flags & SA_INTERRUPT)) local_irq_enable(); diff --git a/arch/arm/kernel/time.c b/arch/arm/kernel/time.c index c232f24f4a60..06054c9ba074 100644 --- a/arch/arm/kernel/time.c +++ b/arch/arm/kernel/time.c @@ -381,6 +381,95 @@ static struct sysdev_class timer_sysclass = { .resume = timer_resume, }; +#ifdef CONFIG_NO_IDLE_HZ +static int timer_dyn_tick_enable(void) +{ + struct dyn_tick_timer *dyn_tick = system_timer->dyn_tick; + unsigned long flags; + int ret = -ENODEV; + + if (dyn_tick) { + write_seqlock_irqsave(&xtime_lock, flags); + ret = 0; + if (!(dyn_tick->state & DYN_TICK_ENABLED)) { + ret = dyn_tick->enable(); + + if (ret == 0) + dyn_tick->state |= DYN_TICK_ENABLED; + } + write_sequnlock_irqrestore(&xtime_lock, flags); + } + + return ret; +} + +static int timer_dyn_tick_disable(void) +{ + struct dyn_tick_timer *dyn_tick = system_timer->dyn_tick; + unsigned long flags; + int ret = -ENODEV; + + if (dyn_tick) { + write_seqlock_irqsave(&xtime_lock, flags); + ret = 0; + if (dyn_tick->state & DYN_TICK_ENABLED) { + ret = dyn_tick->disable(); + + if (ret == 0) + dyn_tick->state &= ~DYN_TICK_ENABLED; + } + write_sequnlock_irqrestore(&xtime_lock, flags); + } + + return ret; +} + +void timer_dyn_reprogram(void) +{ + struct dyn_tick_timer *dyn_tick = system_timer->dyn_tick; + unsigned long flags; + + write_seqlock_irqsave(&xtime_lock, flags); + if (dyn_tick->state & DYN_TICK_ENABLED) + dyn_tick->reprogram(next_timer_interrupt() - jiffies); + write_sequnlock_irqrestore(&xtime_lock, flags); +} + +static ssize_t timer_show_dyn_tick(struct sys_device *dev, char *buf) +{ + return sprintf(buf, "%i\n", + (system_timer->dyn_tick->state & DYN_TICK_ENABLED) >> 1); +} + +static ssize_t timer_set_dyn_tick(struct sys_device *dev, const char *buf, + size_t count) +{ + unsigned int enable = simple_strtoul(buf, NULL, 2); + + if (enable) + timer_dyn_tick_enable(); + else + timer_dyn_tick_disable(); + + return count; +} +static SYSDEV_ATTR(dyn_tick, 0644, timer_show_dyn_tick, timer_set_dyn_tick); + +/* + * dyntick=enable|disable + */ +static char dyntick_str[4] __initdata = ""; + +static int __init dyntick_setup(char *str) +{ + if (str) + strlcpy(dyntick_str, str, sizeof(dyntick_str)); + return 1; +} + +__setup("dyntick=", dyntick_setup); +#endif + static int __init timer_init_sysfs(void) { int ret = sysdev_class_register(&timer_sysclass); @@ -388,6 +477,20 @@ static int __init timer_init_sysfs(void) system_timer->dev.cls = &timer_sysclass; ret = sysdev_register(&system_timer->dev); } + +#ifdef CONFIG_NO_IDLE_HZ + if (ret == 0 && system_timer->dyn_tick) { + ret = sysdev_create_file(&system_timer->dev, &attr_dyn_tick); + + /* + * Turn on dynamic tick after calibrate delay + * for correct bogomips + */ + if (ret == 0 && dyntick_str[0] == 'e') + ret = timer_dyn_tick_enable(); + } +#endif + return ret; } diff --git a/arch/arm/mach-clps711x/Kconfig b/arch/arm/mach-clps711x/Kconfig index 45c930ccd064..0793dcf54f2e 100644 --- a/arch/arm/mach-clps711x/Kconfig +++ b/arch/arm/mach-clps711x/Kconfig @@ -28,7 +28,7 @@ config ARCH_CLEP7312 config ARCH_EDB7211 bool "EDB7211" select ISA - select DISCONTIGMEM + select ARCH_DISCONTIGMEM_ENABLE help Say Y here if you intend to run this kernel on a Cirrus Logic EDB-7211 evaluation board. diff --git a/arch/arm/mach-ixp2000/Kconfig b/arch/arm/mach-ixp2000/Kconfig index 9361e05f6fa3..ecb58d83478e 100644 --- a/arch/arm/mach-ixp2000/Kconfig +++ b/arch/arm/mach-ixp2000/Kconfig @@ -54,6 +54,14 @@ config ARCH_IXDP2X01 depends on ARCH_IXDP2401 || ARCH_IXDP2801 default y +config IXP2000_SUPPORT_BROKEN_PCI_IO + bool "Support broken PCI I/O on older IXP2000s" + default y + help + Say 'N' here if you only intend to run your kernel on an + IXP2000 B0 or later model and do not need the PCI I/O + byteswap workaround. Say 'Y' otherwise. + endmenu endif diff --git a/arch/arm/mach-ixp2000/enp2611.c b/arch/arm/mach-ixp2000/enp2611.c index 04b38bcf9aac..f3a291b6a9fb 100644 --- a/arch/arm/mach-ixp2000/enp2611.c +++ b/arch/arm/mach-ixp2000/enp2611.c @@ -197,8 +197,23 @@ static struct platform_device enp2611_flash = { .resource = &enp2611_flash_resource, }; +static struct ixp2000_i2c_pins enp2611_i2c_gpio_pins = { + .sda_pin = ENP2611_GPIO_SDA, + .scl_pin = ENP2611_GPIO_SCL, +}; + +static struct platform_device enp2611_i2c_controller = { + .name = "IXP2000-I2C", + .id = 0, + .dev = { + .platform_data = &enp2611_i2c_gpio_pins + }, + .num_resources = 0 +}; + static struct platform_device *enp2611_devices[] __initdata = { - &enp2611_flash + &enp2611_flash, + &enp2611_i2c_controller }; static void __init enp2611_init_machine(void) diff --git a/arch/arm/mach-ixp2000/ixdp2x00.c b/arch/arm/mach-ixp2000/ixdp2x00.c index 21c41fe15b99..5e4380747b53 100644 --- a/arch/arm/mach-ixp2000/ixdp2x00.c +++ b/arch/arm/mach-ixp2000/ixdp2x00.c @@ -42,6 +42,9 @@ #include <asm/mach/flash.h> #include <asm/mach/arch.h> +#include <asm/arch/gpio.h> + + /************************************************************************* * IXDP2x00 IRQ Initialization *************************************************************************/ diff --git a/arch/arm/mach-ixp2000/pci.c b/arch/arm/mach-ixp2000/pci.c index 5ff2f2718c58..0788fb2b5c10 100644 --- a/arch/arm/mach-ixp2000/pci.c +++ b/arch/arm/mach-ixp2000/pci.c @@ -198,6 +198,19 @@ clear_master_aborts(void) void __init ixp2000_pci_preinit(void) { +#ifndef CONFIG_IXP2000_SUPPORT_BROKEN_PCI_IO + /* + * Configure the PCI unit to properly byteswap I/O transactions, + * and verify that it worked. + */ + ixp2000_reg_write(IXP2000_PCI_CONTROL, + (*IXP2000_PCI_CONTROL | PCI_CONTROL_IEE)); + + if ((*IXP2000_PCI_CONTROL & PCI_CONTROL_IEE) == 0) + panic("IXP2000: PCI I/O is broken on this ixp model, and " + "the needed workaround has not been configured in"); +#endif + hook_fault_code(16+6, ixp2000_pci_abort_handler, SIGBUS, "PCI config cycle to non-existent device"); } diff --git a/arch/frv/kernel/setup.c b/arch/frv/kernel/setup.c index ef6865f0b979..767ebb55bd83 100644 --- a/arch/frv/kernel/setup.c +++ b/arch/frv/kernel/setup.c @@ -790,12 +790,10 @@ void __init setup_arch(char **cmdline_p) #ifndef CONFIG_GDBSTUB_UART0 __reg(UART0_BASE + UART_IER * 8) = 0; early_serial_setup(&__frv_uart0); -// register_serial(&__frv_uart0); #endif #ifndef CONFIG_GDBSTUB_UART1 __reg(UART1_BASE + UART_IER * 8) = 0; early_serial_setup(&__frv_uart1); -// register_serial(&__frv_uart1); #endif #if defined(CONFIG_CHR_DEV_FLASH) || defined(CONFIG_BLK_DEV_FLASH) diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index d4ae5f9ceae6..6c02336fe2e4 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig @@ -510,28 +510,7 @@ config SCHED_SMT cost of slightly increased overhead in some places. If unsure say N here. -config PREEMPT - bool "Preemptible Kernel" - help - This option reduces the latency of the kernel when reacting to - real-time or interactive events by allowing a low priority process to - be preempted even if it is in kernel mode executing a system call. - This allows applications to run more reliably even when the system is - under load. - - Say Y here if you are building a kernel for a desktop, embedded - or real-time system. Say N if you are unsure. - -config PREEMPT_BKL - bool "Preempt The Big Kernel Lock" - depends on PREEMPT - default y - help - This option reduces the latency of the kernel by making the - big kernel lock preemptible. - - Say Y here if you are building a kernel for a desktop system. - Say N if you are unsure. +source "kernel/Kconfig.preempt" config X86_UP_APIC bool "Local APIC support on uniprocessors" @@ -963,6 +942,41 @@ config SECCOMP source kernel/Kconfig.hz +config PHYSICAL_START + hex "Physical address where the kernel is loaded" if EMBEDDED + default "0x100000" + help + This gives the physical address where the kernel is loaded. + Primarily used in the case of kexec on panic where the + fail safe kernel needs to run at a different address than + the panic-ed kernel. + + Don't change this unless you know what you are doing. + +config KEXEC + bool "kexec system call (EXPERIMENTAL)" + depends on EXPERIMENTAL + help + kexec is a system call that implements the ability to shutdown your + current kernel, and to start another kernel. It is like a reboot + but it is indepedent of the system firmware. And like a reboot + you can start any kernel with it, not just Linux. + + The name comes from the similiarity to the exec system call. + + It is an ongoing process to be certain the hardware in a machine + is properly shutdown, so do not be surprised if this code does not + initially work for you. It may help to enable device hotplugging + support. As of this writing the exact hardware interface is + strongly in flux, so no good recommendation can be made. + +config CRASH_DUMP + bool "kernel crash dumps (EXPERIMENTAL)" + depends on EMBEDDED + depends on EXPERIMENTAL + depends on HIGHMEM + help + Generate crash dump after being started by kexec. endmenu @@ -1250,6 +1264,15 @@ config SCx200 This support is also available as a module. If compiled as a module, it will be called scx200. +config HOTPLUG_CPU + bool "Support for hot-pluggable CPUs (EXPERIMENTAL)" + depends on SMP && HOTPLUG && EXPERIMENTAL + ---help--- + Say Y here to experiment with turning CPUs off and on. CPUs + can be controlled through /sys/devices/system/cpu. + + Say N. + source "drivers/pcmcia/Kconfig" source "drivers/pci/hotplug/Kconfig" diff --git a/arch/i386/boot/Makefile b/arch/i386/boot/Makefile index 43cd6220ee49..1e71382d413a 100644 --- a/arch/i386/boot/Makefile +++ b/arch/i386/boot/Makefile @@ -25,8 +25,8 @@ SVGA_MODE := -DSVGA_MODE=NORMAL_VGA #RAMDISK := -DRAMDISK=512 -targets := vmlinux.bin bootsect bootsect.o setup setup.o \ - zImage bzImage +targets := vmlinux.bin bootsect bootsect.o \ + setup setup.o zImage bzImage subdir- := compressed hostprogs-y := tools/build diff --git a/arch/i386/boot/compressed/head.S b/arch/i386/boot/compressed/head.S index c5e80b69e7d4..b5893e4ecd37 100644 --- a/arch/i386/boot/compressed/head.S +++ b/arch/i386/boot/compressed/head.S @@ -25,6 +25,7 @@ #include <linux/linkage.h> #include <asm/segment.h> +#include <asm/page.h> .globl startup_32 @@ -74,7 +75,7 @@ startup_32: popl %esi # discard address popl %esi # real mode pointer xorl %ebx,%ebx - ljmp $(__BOOT_CS), $0x100000 + ljmp $(__BOOT_CS), $__PHYSICAL_START /* * We come here, if we were loaded high. @@ -99,7 +100,7 @@ startup_32: popl %ecx # lcount popl %edx # high_buffer_start popl %eax # hcount - movl $0x100000,%edi + movl $__PHYSICAL_START,%edi cli # make sure we don't get interrupted ljmp $(__BOOT_CS), $0x1000 # and jump to the move routine @@ -124,5 +125,5 @@ move_routine_start: movsl movl %ebx,%esi # Restore setup pointer xorl %ebx,%ebx - ljmp $(__BOOT_CS), $0x100000 + ljmp $(__BOOT_CS), $__PHYSICAL_START move_routine_end: diff --git a/arch/i386/boot/compressed/misc.c b/arch/i386/boot/compressed/misc.c index cedc55cc47de..82a807f9f5e6 100644 --- a/arch/i386/boot/compressed/misc.c +++ b/arch/i386/boot/compressed/misc.c @@ -13,6 +13,7 @@ #include <linux/vmalloc.h> #include <linux/tty.h> #include <asm/io.h> +#include <asm/page.h> /* * gzip declarations @@ -308,7 +309,7 @@ static void setup_normal_output_buffer(void) #else if ((RM_ALT_MEM_K > RM_EXT_MEM_K ? RM_ALT_MEM_K : RM_EXT_MEM_K) < 1024) error("Less than 2MB of memory"); #endif - output_data = (char *)0x100000; /* Points to 1M */ + output_data = (char *)__PHYSICAL_START; /* Normally Points to 1M */ free_mem_end_ptr = (long)real_mode; } @@ -333,8 +334,8 @@ static void setup_output_buffer_if_we_run_high(struct moveparams *mv) low_buffer_size = low_buffer_end - LOW_BUFFER_START; high_loaded = 1; free_mem_end_ptr = (long)high_buffer_start; - if ( (0x100000 + low_buffer_size) > ((ulg)high_buffer_start)) { - high_buffer_start = (uch *)(0x100000 + low_buffer_size); + if ( (__PHYSICAL_START + low_buffer_size) > ((ulg)high_buffer_start)) { + high_buffer_start = (uch *)(__PHYSICAL_START + low_buffer_size); mv->hcount = 0; /* say: we need not to move high_buffer */ } else mv->hcount = -1; @@ -353,7 +354,6 @@ static void close_output_buffer_if_we_run_high(struct moveparams *mv) } } - asmlinkage int decompress_kernel(struct moveparams *mv, void *rmode) { real_mode = rmode; diff --git a/arch/i386/boot/edd.S b/arch/i386/boot/edd.S index 027d6b354ffb..d8d69f2b911d 100644 --- a/arch/i386/boot/edd.S +++ b/arch/i386/boot/edd.S @@ -6,7 +6,7 @@ * projects 1572D, 1484D, 1386D, 1226DT * disk signature read by Matt Domsch <Matt_Domsch@dell.com> * and Andrew Wilks <Andrew_Wilks@dell.com> September 2003, June 2004 - * legacy CHS retreival by Patrick J. LoPresti <patl@users.sourceforge.net> + * legacy CHS retrieval by Patrick J. LoPresti <patl@users.sourceforge.net> * March 2004 * Command line option parsing, Matt Domsch, November 2004 */ diff --git a/arch/i386/boot/setup.S b/arch/i386/boot/setup.S index caa1fde6904e..8cb420f40c58 100644 --- a/arch/i386/boot/setup.S +++ b/arch/i386/boot/setup.S @@ -33,7 +33,7 @@ * Transcribed from Intel (as86) -> AT&T (gas) by Chris Noe, May 1999. * <stiker@northlink.com> * - * Fix to work around buggy BIOSes which dont use carry bit correctly + * Fix to work around buggy BIOSes which don't use carry bit correctly * and/or report extended memory in CX/DX for e801h memory size detection * call. As a result the kernel got wrong figures. The int15/e801h docs * from Ralf Brown interrupt list seem to indicate AX/BX should be used @@ -357,7 +357,7 @@ bail820: meme801: stc # fix to work around buggy - xorw %cx,%cx # BIOSes which dont clear/set + xorw %cx,%cx # BIOSes which don't clear/set xorw %dx,%dx # carry on pass/error of # e801h memory size call # or merely pass cx,dx though @@ -847,7 +847,7 @@ flush_instr: # # but we yet haven't reloaded the CS register, so the default size # of the target offset still is 16 bit. -# However, using an operand prefix (0x66), the CPU will properly +# However, using an operand prefix (0x66), the CPU will properly # take our 48 bit far pointer. (INTeL 80386 Programmer's Reference # Manual, Mixing 16-bit and 32-bit code, page 16-6) diff --git a/arch/i386/boot/tools/build.c b/arch/i386/boot/tools/build.c index 26509b826aed..4a17956512e1 100644 --- a/arch/i386/boot/tools/build.c +++ b/arch/i386/boot/tools/build.c @@ -1,6 +1,4 @@ /* - * $Id: build.c,v 1.5 1997/05/19 12:29:58 mj Exp $ - * * Copyright (C) 1991, 1992 Linus Torvalds * Copyright (C) 1997 Martin Mares */ @@ -8,7 +6,8 @@ /* * This file builds a disk-image from three different files: * - * - bootsect: exactly 512 bytes of 8086 machine code, loads the rest + * - bootsect: compatibility mbr which prints an error message if + * someone tries to boot the kernel directly. * - setup: 8086 machine code, sets up system parm * - system: 80386 code for actual system * diff --git a/arch/i386/crypto/aes.c b/arch/i386/crypto/aes.c index 1019430fc1f1..88ee85c3b43b 100644 --- a/arch/i386/crypto/aes.c +++ b/arch/i386/crypto/aes.c @@ -59,7 +59,7 @@ struct aes_ctx { }; #define WPOLY 0x011b -#define u32_in(x) le32_to_cpu(*(const u32 *)(x)) +#define u32_in(x) le32_to_cpup((const __le32 *)(x)) #define bytes2word(b0, b1, b2, b3) \ (((u32)(b3) << 24) | ((u32)(b2) << 16) | ((u32)(b1) << 8) | (b0)) diff --git a/arch/i386/defconfig b/arch/i386/defconfig index 28e620383799..ca07b95c06b8 100644 --- a/arch/i386/defconfig +++ b/arch/i386/defconfig @@ -126,7 +126,6 @@ CONFIG_HAVE_DEC_LOCK=y # CONFIG_PM=y CONFIG_SOFTWARE_SUSPEND=y -# CONFIG_PM_DISK is not set # # ACPI (Advanced Configuration and Power Interface) Support diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile index 51ecd512603d..4cc83b322b36 100644 --- a/arch/i386/kernel/Makefile +++ b/arch/i386/kernel/Makefile @@ -24,6 +24,7 @@ obj-$(CONFIG_X86_MPPARSE) += mpparse.o obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o obj-$(CONFIG_X86_IO_APIC) += io_apic.o obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups.o +obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o crash.o obj-$(CONFIG_X86_NUMAQ) += numaq.o obj-$(CONFIG_X86_SUMMIT_NUMA) += summit.o obj-$(CONFIG_KPROBES) += kprobes.o diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c index 848bb97af7ca..9f63ae0f404b 100644 --- a/arch/i386/kernel/acpi/boot.c +++ b/arch/i386/kernel/acpi/boot.c @@ -29,6 +29,7 @@ #include <linux/efi.h> #include <linux/irq.h> #include <linux/module.h> +#include <linux/dmi.h> #include <asm/pgtable.h> #include <asm/io_apic.h> @@ -815,6 +816,219 @@ acpi_process_madt(void) return; } +extern int acpi_force; + +#ifdef __i386__ + +#ifdef CONFIG_ACPI_PCI +static int __init disable_acpi_irq(struct dmi_system_id *d) +{ + if (!acpi_force) { + printk(KERN_NOTICE "%s detected: force use of acpi=noirq\n", + d->ident); + acpi_noirq_set(); + } + return 0; +} + +static int __init disable_acpi_pci(struct dmi_system_id *d) +{ + if (!acpi_force) { + printk(KERN_NOTICE "%s detected: force use of pci=noacpi\n", + d->ident); + acpi_disable_pci(); + } + return 0; +} +#endif + +static int __init dmi_disable_acpi(struct dmi_system_id *d) +{ + if (!acpi_force) { + printk(KERN_NOTICE "%s detected: acpi off\n",d->ident); + disable_acpi(); + } else { + printk(KERN_NOTICE + "Warning: DMI blacklist says broken, but acpi forced\n"); + } + return 0; +} + +/* + * Limit ACPI to CPU enumeration for HT + */ +static int __init force_acpi_ht(struct dmi_system_id *d) +{ + if (!acpi_force) { + printk(KERN_NOTICE "%s detected: force use of acpi=ht\n", d->ident); + disable_acpi(); + acpi_ht = 1; + } else { + printk(KERN_NOTICE + "Warning: acpi=force overrules DMI blacklist: acpi=ht\n"); + } + return 0; +} + +/* + * If your system is blacklisted here, but you find that acpi=force + * works for you, please contact acpi-devel@sourceforge.net + */ +static struct dmi_system_id __initdata acpi_dmi_table[] = { + /* + * Boxes that need ACPI disabled + */ + { + .callback = dmi_disable_acpi, + .ident = "IBM Thinkpad", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "IBM"), + DMI_MATCH(DMI_BOARD_NAME, "2629H1G"), + }, + }, + + /* + * Boxes that need acpi=ht + */ + { + .callback = force_acpi_ht, + .ident = "FSC Primergy T850", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"), + DMI_MATCH(DMI_PRODUCT_NAME, "PRIMERGY T850"), + }, + }, + { + .callback = force_acpi_ht, + .ident = "DELL GX240", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "Dell Computer Corporation"), + DMI_MATCH(DMI_BOARD_NAME, "OptiPlex GX240"), + }, + }, + { + .callback = force_acpi_ht, + .ident = "HP VISUALIZE NT Workstation", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "Hewlett-Packard"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP VISUALIZE NT Workstation"), + }, + }, + { + .callback = force_acpi_ht, + .ident = "Compaq Workstation W8000", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Compaq"), + DMI_MATCH(DMI_PRODUCT_NAME, "Workstation W8000"), + }, + }, + { + .callback = force_acpi_ht, + .ident = "ASUS P4B266", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), + DMI_MATCH(DMI_BOARD_NAME, "P4B266"), + }, + }, + { + .callback = force_acpi_ht, + .ident = "ASUS P2B-DS", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), + DMI_MATCH(DMI_BOARD_NAME, "P2B-DS"), + }, + }, + { + .callback = force_acpi_ht, + .ident = "ASUS CUR-DLS", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), + DMI_MATCH(DMI_BOARD_NAME, "CUR-DLS"), + }, + }, + { + .callback = force_acpi_ht, + .ident = "ABIT i440BX-W83977", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "ABIT <http://www.abit.com>"), + DMI_MATCH(DMI_BOARD_NAME, "i440BX-W83977 (BP6)"), + }, + }, + { + .callback = force_acpi_ht, + .ident = "IBM Bladecenter", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "IBM"), + DMI_MATCH(DMI_BOARD_NAME, "IBM eServer BladeCenter HS20"), + }, + }, + { + .callback = force_acpi_ht, + .ident = "IBM eServer xSeries 360", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "IBM"), + DMI_MATCH(DMI_BOARD_NAME, "eServer xSeries 360"), + }, + }, + { + .callback = force_acpi_ht, + .ident = "IBM eserver xSeries 330", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "IBM"), + DMI_MATCH(DMI_BOARD_NAME, "eserver xSeries 330"), + }, + }, + { + .callback = force_acpi_ht, + .ident = "IBM eserver xSeries 440", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "IBM"), + DMI_MATCH(DMI_PRODUCT_NAME, "eserver xSeries 440"), + }, + }, + +#ifdef CONFIG_ACPI_PCI + /* + * Boxes that need ACPI PCI IRQ routing disabled + */ + { + .callback = disable_acpi_irq, + .ident = "ASUS A7V", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC"), + DMI_MATCH(DMI_BOARD_NAME, "<A7V>"), + /* newer BIOS, Revision 1011, does work */ + DMI_MATCH(DMI_BIOS_VERSION, "ASUS A7V ACPI BIOS Revision 1007"), + }, + }, + + /* + * Boxes that need ACPI PCI IRQ routing and PCI scan disabled + */ + { /* _BBN 0 bug */ + .callback = disable_acpi_pci, + .ident = "ASUS PR-DLS", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), + DMI_MATCH(DMI_BOARD_NAME, "PR-DLS"), + DMI_MATCH(DMI_BIOS_VERSION, "ASUS PR-DLS ACPI BIOS Revision 1010"), + DMI_MATCH(DMI_BIOS_DATE, "03/21/2003") + }, + }, + { + .callback = disable_acpi_pci, + .ident = "Acer TravelMate 36x Laptop", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Acer"), + DMI_MATCH(DMI_PRODUCT_NAME, "TravelMate 360"), + }, + }, +#endif + { } +}; + +#endif /* __i386__ */ + /* * acpi_boot_table_init() and acpi_boot_init() * called from setup_arch(), always. @@ -843,6 +1057,10 @@ acpi_boot_table_init(void) { int error; +#ifdef __i386__ + dmi_check_system(acpi_dmi_table); +#endif + /* * If acpi_disabled, bail out * One exception: acpi=ht continues far enough to enumerate LAPICs @@ -870,8 +1088,6 @@ acpi_boot_table_init(void) */ error = acpi_blacklisted(); if (error) { - extern int acpi_force; - if (acpi_force) { printk(KERN_WARNING PREFIX "acpi=force override\n"); } else { diff --git a/arch/i386/kernel/acpi/sleep.c b/arch/i386/kernel/acpi/sleep.c index 28bb0514bb6e..c1af93032ff3 100644 --- a/arch/i386/kernel/acpi/sleep.c +++ b/arch/i386/kernel/acpi/sleep.c @@ -7,6 +7,7 @@ #include <linux/acpi.h> #include <linux/bootmem.h> +#include <linux/dmi.h> #include <asm/smp.h> #include <asm/tlbflush.h> @@ -91,3 +92,29 @@ static int __init acpi_sleep_setup(char *str) __setup("acpi_sleep=", acpi_sleep_setup); + + +static __init int reset_videomode_after_s3(struct dmi_system_id *d) +{ + acpi_video_flags |= 2; + return 0; +} + +static __initdata struct dmi_system_id acpisleep_dmi_table[] = { + { /* Reset video mode after returning from ACPI S3 sleep */ + .callback = reset_videomode_after_s3, + .ident = "Toshiba Satellite 4030cdt", + .matches = { + DMI_MATCH(DMI_PRODUCT_NAME, "S4030CDT/4.3"), + }, + }, + { } +}; + +static int __init acpisleep_dmi_init(void) +{ + dmi_check_system(acpisleep_dmi_table); + return 0; +} + +core_initcall(acpisleep_dmi_init); diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c index 8d993fa71754..93df90bbb87e 100644 --- a/arch/i386/kernel/apic.c +++ b/arch/i386/kernel/apic.c @@ -26,6 +26,7 @@ #include <linux/mc146818rtc.h> #include <linux/kernel_stat.h> #include <linux/sysdev.h> +#include <linux/cpu.h> #include <asm/atomic.h> #include <asm/smp.h> @@ -40,6 +41,11 @@ #include "io_ports.h" /* + * Knob to control our willingness to enable the local APIC. + */ +int enable_local_apic __initdata = 0; /* -1=force-disable, +1=force-enable */ + +/* * Debug level */ int apic_verbosity; @@ -205,7 +211,7 @@ void __init connect_bsp_APIC(void) enable_apic_mode(); } -void disconnect_bsp_APIC(void) +void disconnect_bsp_APIC(int virt_wire_setup) { if (pic_mode) { /* @@ -219,6 +225,42 @@ void disconnect_bsp_APIC(void) outb(0x70, 0x22); outb(0x00, 0x23); } + else { + /* Go back to Virtual Wire compatibility mode */ + unsigned long value; + + /* For the spurious interrupt use vector F, and enable it */ + value = apic_read(APIC_SPIV); + value &= ~APIC_VECTOR_MASK; + value |= APIC_SPIV_APIC_ENABLED; + value |= 0xf; + apic_write_around(APIC_SPIV, value); + + if (!virt_wire_setup) { + /* For LVT0 make it edge triggered, active high, external and enabled */ + value = apic_read(APIC_LVT0); + value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | + APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | + APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED ); + value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; + value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT); + apic_write_around(APIC_LVT0, value); + } + else { + /* Disable LVT0 */ + apic_write_around(APIC_LVT0, APIC_LVT_MASKED); + } + + /* For LVT1 make it edge triggered, active high, nmi and enabled */ + value = apic_read(APIC_LVT1); + value &= ~( + APIC_MODE_MASK | APIC_SEND_PENDING | + APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | + APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); + value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; + value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI); + apic_write_around(APIC_LVT1, value); + } } void disable_local_APIC(void) @@ -363,7 +405,7 @@ void __init init_bsp_APIC(void) apic_write_around(APIC_LVT1, value); } -void __init setup_local_APIC (void) +void __devinit setup_local_APIC(void) { unsigned long oldvalue, value, ver, maxlvt; @@ -634,7 +676,7 @@ static struct sys_device device_lapic = { .cls = &lapic_sysclass, }; -static void __init apic_pm_activate(void) +static void __devinit apic_pm_activate(void) { apic_pm_state.active = 1; } @@ -665,26 +707,6 @@ static void apic_pm_activate(void) { } * Original code written by Keir Fraser. */ -/* - * Knob to control our willingness to enable the local APIC. - */ -int enable_local_apic __initdata = 0; /* -1=force-disable, +1=force-enable */ - -static int __init lapic_disable(char *str) -{ - enable_local_apic = -1; - clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability); - return 0; -} -__setup("nolapic", lapic_disable); - -static int __init lapic_enable(char *str) -{ - enable_local_apic = 1; - return 0; -} -__setup("lapic", lapic_enable); - static int __init apic_set_verbosity(char *str) { if (strcmp("debug", str) == 0) @@ -855,7 +877,7 @@ fake_ioapic_page: * but we do not accept timer interrupts yet. We only allow the BP * to calibrate. */ -static unsigned int __init get_8254_timer_count(void) +static unsigned int __devinit get_8254_timer_count(void) { extern spinlock_t i8253_lock; unsigned long flags; @@ -874,7 +896,7 @@ static unsigned int __init get_8254_timer_count(void) } /* next tick in 8254 can be caught by catching timer wraparound */ -static void __init wait_8254_wraparound(void) +static void __devinit wait_8254_wraparound(void) { unsigned int curr_count, prev_count; @@ -894,7 +916,7 @@ static void __init wait_8254_wraparound(void) * Default initialization for 8254 timers. If we use other timers like HPET, * we override this later */ -void (*wait_timer_tick)(void) __initdata = wait_8254_wraparound; +void (*wait_timer_tick)(void) __devinitdata = wait_8254_wraparound; /* * This function sets up the local APIC timer, with a timeout of @@ -930,7 +952,7 @@ static void __setup_APIC_LVTT(unsigned int clocks) apic_write_around(APIC_TMICT, clocks/APIC_DIVISOR); } -static void __init setup_APIC_timer(unsigned int clocks) +static void __devinit setup_APIC_timer(unsigned int clocks) { unsigned long flags; @@ -1043,12 +1065,12 @@ void __init setup_boot_APIC_clock(void) local_irq_enable(); } -void __init setup_secondary_APIC_clock(void) +void __devinit setup_secondary_APIC_clock(void) { setup_APIC_timer(calibration_result); } -void __init disable_APIC_timer(void) +void __devinit disable_APIC_timer(void) { if (using_apic_timer) { unsigned long v; diff --git a/arch/i386/kernel/apm.c b/arch/i386/kernel/apm.c index 0ff65abcd56c..d48ce9290963 100644 --- a/arch/i386/kernel/apm.c +++ b/arch/i386/kernel/apm.c @@ -346,10 +346,10 @@ extern int (*console_blank_hook)(int); struct apm_user { int magic; struct apm_user * next; - int suser: 1; - int writer: 1; - int reader: 1; - int suspend_wait: 1; + unsigned int suser: 1; + unsigned int writer: 1; + unsigned int reader: 1; + unsigned int suspend_wait: 1; int suspend_result; int suspends_pending; int standbys_pending; diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c index b9954248d0aa..2203a9d20212 100644 --- a/arch/i386/kernel/cpu/common.c +++ b/arch/i386/kernel/cpu/common.c @@ -24,9 +24,9 @@ EXPORT_PER_CPU_SYMBOL(cpu_gdt_table); DEFINE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]); EXPORT_PER_CPU_SYMBOL(cpu_16bit_stack); -static int cachesize_override __initdata = -1; -static int disable_x86_fxsr __initdata = 0; -static int disable_x86_serial_nr __initdata = 1; +static int cachesize_override __devinitdata = -1; +static int disable_x86_fxsr __devinitdata = 0; +static int disable_x86_serial_nr __devinitdata = 1; struct cpu_dev * cpu_devs[X86_VENDOR_NUM] = {}; @@ -59,7 +59,7 @@ static int __init cachesize_setup(char *str) } __setup("cachesize=", cachesize_setup); -int __init get_model_name(struct cpuinfo_x86 *c) +int __devinit get_model_name(struct cpuinfo_x86 *c) { unsigned int *v; char *p, *q; @@ -89,7 +89,7 @@ int __init get_model_name(struct cpuinfo_x86 *c) } -void __init display_cacheinfo(struct cpuinfo_x86 *c) +void __devinit display_cacheinfo(struct cpuinfo_x86 *c) { unsigned int n, dummy, ecx, edx, l2size; @@ -130,7 +130,7 @@ void __init display_cacheinfo(struct cpuinfo_x86 *c) /* in particular, if CPUID levels 0x80000002..4 are supported, this isn't used */ /* Look up CPU names by table lookup. */ -static char __init *table_lookup_model(struct cpuinfo_x86 *c) +static char __devinit *table_lookup_model(struct cpuinfo_x86 *c) { struct cpu_model_info *info; @@ -151,7 +151,7 @@ static char __init *table_lookup_model(struct cpuinfo_x86 *c) } -void __init get_cpu_vendor(struct cpuinfo_x86 *c, int early) +void __devinit get_cpu_vendor(struct cpuinfo_x86 *c, int early) { char *v = c->x86_vendor_id; int i; @@ -202,7 +202,7 @@ static inline int flag_is_changeable_p(u32 flag) /* Probe for the CPUID instruction */ -static int __init have_cpuid_p(void) +static int __devinit have_cpuid_p(void) { return flag_is_changeable_p(X86_EFLAGS_ID); } @@ -249,7 +249,7 @@ static void __init early_cpu_detect(void) #endif } -void __init generic_identify(struct cpuinfo_x86 * c) +void __devinit generic_identify(struct cpuinfo_x86 * c) { u32 tfms, xlvl; int junk; @@ -296,7 +296,7 @@ void __init generic_identify(struct cpuinfo_x86 * c) } } -static void __init squash_the_stupid_serial_number(struct cpuinfo_x86 *c) +static void __devinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c) { if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr ) { /* Disable processor serial number */ @@ -324,7 +324,7 @@ __setup("serialnumber", x86_serial_nr_setup); /* * This does the hard work of actually picking apart the CPU stuff... */ -void __init identify_cpu(struct cpuinfo_x86 *c) +void __devinit identify_cpu(struct cpuinfo_x86 *c) { int i; @@ -432,10 +432,13 @@ void __init identify_cpu(struct cpuinfo_x86 *c) #ifdef CONFIG_X86_MCE mcheck_init(c); #endif + if (c == &boot_cpu_data) + sysenter_setup(); + enable_sep_cpu(); } #ifdef CONFIG_X86_HT -void __init detect_ht(struct cpuinfo_x86 *c) +void __devinit detect_ht(struct cpuinfo_x86 *c) { u32 eax, ebx, ecx, edx; int index_msb, tmp; @@ -490,7 +493,7 @@ void __init detect_ht(struct cpuinfo_x86 *c) } #endif -void __init print_cpu_info(struct cpuinfo_x86 *c) +void __devinit print_cpu_info(struct cpuinfo_x86 *c) { char *vendor = NULL; @@ -513,7 +516,7 @@ void __init print_cpu_info(struct cpuinfo_x86 *c) printk("\n"); } -cpumask_t cpu_initialized __initdata = CPU_MASK_NONE; +cpumask_t cpu_initialized __devinitdata = CPU_MASK_NONE; /* This is hacky. :) * We're emulating future behavior. @@ -560,7 +563,7 @@ void __init early_cpu_init(void) * and IDT. We reload them nevertheless, this function acts as a * 'CPU state barrier', nothing should get across. */ -void __init cpu_init (void) +void __devinit cpu_init(void) { int cpu = smp_processor_id(); struct tss_struct * t = &per_cpu(init_tss, cpu); @@ -648,3 +651,15 @@ void __init cpu_init (void) clear_used_math(); mxcsr_feature_mask_init(); } + +#ifdef CONFIG_HOTPLUG_CPU +void __devinit cpu_uninit(void) +{ + int cpu = raw_smp_processor_id(); + cpu_clear(cpu, cpu_initialized); + + /* lazy TLB state */ + per_cpu(cpu_tlbstate, cpu).state = 0; + per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm; +} +#endif diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k7.c b/arch/i386/kernel/cpu/cpufreq/powernow-k7.c index 5c530064eb74..73a5dc5b26b8 100644 --- a/arch/i386/kernel/cpu/cpufreq/powernow-k7.c +++ b/arch/i386/kernel/cpu/cpufreq/powernow-k7.c @@ -648,9 +648,7 @@ static int powernow_cpu_exit (struct cpufreq_policy *policy) { } #endif - if (powernow_table) - kfree(powernow_table); - + kfree(powernow_table); return 0; } diff --git a/arch/i386/kernel/cpu/intel.c b/arch/i386/kernel/cpu/intel.c index 121aa2176e69..96a75d045835 100644 --- a/arch/i386/kernel/cpu/intel.c +++ b/arch/i386/kernel/cpu/intel.c @@ -28,7 +28,7 @@ extern int trap_init_f00f_bug(void); struct movsl_mask movsl_mask; #endif -void __init early_intel_workaround(struct cpuinfo_x86 *c) +void __devinit early_intel_workaround(struct cpuinfo_x86 *c) { if (c->x86_vendor != X86_VENDOR_INTEL) return; @@ -43,7 +43,7 @@ void __init early_intel_workaround(struct cpuinfo_x86 *c) * This is called before we do cpu ident work */ -int __init ppro_with_ram_bug(void) +int __devinit ppro_with_ram_bug(void) { /* Uses data from early_cpu_detect now */ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && @@ -61,7 +61,7 @@ int __init ppro_with_ram_bug(void) * P4 Xeon errata 037 workaround. * Hardware prefetcher may cause stale data to be loaded into the cache. */ -static void __init Intel_errata_workarounds(struct cpuinfo_x86 *c) +static void __devinit Intel_errata_workarounds(struct cpuinfo_x86 *c) { unsigned long lo, hi; @@ -80,7 +80,7 @@ static void __init Intel_errata_workarounds(struct cpuinfo_x86 *c) /* * find out the number of processor cores on the die */ -static int __init num_cpu_cores(struct cpuinfo_x86 *c) +static int __devinit num_cpu_cores(struct cpuinfo_x86 *c) { unsigned int eax; @@ -98,7 +98,7 @@ static int __init num_cpu_cores(struct cpuinfo_x86 *c) return 1; } -static void __init init_intel(struct cpuinfo_x86 *c) +static void __devinit init_intel(struct cpuinfo_x86 *c) { unsigned int l2 = 0; char *p = NULL; @@ -204,7 +204,7 @@ static unsigned int intel_size_cache(struct cpuinfo_x86 * c, unsigned int size) return size; } -static struct cpu_dev intel_cpu_dev __initdata = { +static struct cpu_dev intel_cpu_dev __devinitdata = { .c_vendor = "Intel", .c_ident = { "GenuineIntel" }, .c_models = { diff --git a/arch/i386/kernel/cpu/intel_cacheinfo.c b/arch/i386/kernel/cpu/intel_cacheinfo.c index a710dc4eb20e..1d768b263269 100644 --- a/arch/i386/kernel/cpu/intel_cacheinfo.c +++ b/arch/i386/kernel/cpu/intel_cacheinfo.c @@ -28,7 +28,7 @@ struct _cache_table }; /* all the cache descriptor types we care about (no TLB or trace cache entries) */ -static struct _cache_table cache_table[] __initdata = +static struct _cache_table cache_table[] __devinitdata = { { 0x06, LVL_1_INST, 8 }, /* 4-way set assoc, 32 byte line size */ { 0x08, LVL_1_INST, 16 }, /* 4-way set assoc, 32 byte line size */ @@ -160,7 +160,7 @@ static int __init find_num_cache_leaves(void) return retval; } -unsigned int __init init_intel_cacheinfo(struct cpuinfo_x86 *c) +unsigned int __devinit init_intel_cacheinfo(struct cpuinfo_x86 *c) { unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0; /* Cache sizes */ unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */ diff --git a/arch/i386/kernel/cpu/mcheck/k7.c b/arch/i386/kernel/cpu/mcheck/k7.c index 8df52e86c4d2..c4abe7657397 100644 --- a/arch/i386/kernel/cpu/mcheck/k7.c +++ b/arch/i386/kernel/cpu/mcheck/k7.c @@ -69,7 +69,7 @@ static fastcall void k7_machine_check(struct pt_regs * regs, long error_code) /* AMD K7 machine check is Intel like */ -void __init amd_mcheck_init(struct cpuinfo_x86 *c) +void __devinit amd_mcheck_init(struct cpuinfo_x86 *c) { u32 l, h; int i; diff --git a/arch/i386/kernel/cpu/mcheck/mce.c b/arch/i386/kernel/cpu/mcheck/mce.c index bf6d1aefafc0..2cf25d2ba0f1 100644 --- a/arch/i386/kernel/cpu/mcheck/mce.c +++ b/arch/i386/kernel/cpu/mcheck/mce.c @@ -16,7 +16,7 @@ #include "mce.h" -int mce_disabled __initdata = 0; +int mce_disabled __devinitdata = 0; int nr_mce_banks; EXPORT_SYMBOL_GPL(nr_mce_banks); /* non-fatal.o */ @@ -31,7 +31,7 @@ static fastcall void unexpected_machine_check(struct pt_regs * regs, long error_ void fastcall (*machine_check_vector)(struct pt_regs *, long error_code) = unexpected_machine_check; /* This has to be run for each processor */ -void __init mcheck_init(struct cpuinfo_x86 *c) +void __devinit mcheck_init(struct cpuinfo_x86 *c) { if (mce_disabled==1) return; diff --git a/arch/i386/kernel/cpu/mcheck/p4.c b/arch/i386/kernel/cpu/mcheck/p4.c index 8b16ceb929b4..0abccb6fdf9e 100644 --- a/arch/i386/kernel/cpu/mcheck/p4.c +++ b/arch/i386/kernel/cpu/mcheck/p4.c @@ -78,7 +78,7 @@ fastcall void smp_thermal_interrupt(struct pt_regs *regs) } /* P4/Xeon Thermal regulation detect and init */ -static void __init intel_init_thermal(struct cpuinfo_x86 *c) +static void __devinit intel_init_thermal(struct cpuinfo_x86 *c) { u32 l, h; unsigned int cpu = smp_processor_id(); @@ -232,7 +232,7 @@ static fastcall void intel_machine_check(struct pt_regs * regs, long error_code) } -void __init intel_p4_mcheck_init(struct cpuinfo_x86 *c) +void __devinit intel_p4_mcheck_init(struct cpuinfo_x86 *c) { u32 l, h; int i; diff --git a/arch/i386/kernel/cpu/mcheck/p5.c b/arch/i386/kernel/cpu/mcheck/p5.c index c45a1b485c80..ec0614cd2925 100644 --- a/arch/i386/kernel/cpu/mcheck/p5.c +++ b/arch/i386/kernel/cpu/mcheck/p5.c @@ -29,7 +29,7 @@ static fastcall void pentium_machine_check(struct pt_regs * regs, long error_cod } /* Set up machine check reporting for processors with Intel style MCE */ -void __init intel_p5_mcheck_init(struct cpuinfo_x86 *c) +void __devinit intel_p5_mcheck_init(struct cpuinfo_x86 *c) { u32 l, h; diff --git a/arch/i386/kernel/cpu/mcheck/p6.c b/arch/i386/kernel/cpu/mcheck/p6.c index 46640f8c2494..f01b73f947e1 100644 --- a/arch/i386/kernel/cpu/mcheck/p6.c +++ b/arch/i386/kernel/cpu/mcheck/p6.c @@ -80,7 +80,7 @@ static fastcall void intel_machine_check(struct pt_regs * regs, long error_code) } /* Set up machine check reporting for processors with Intel style MCE */ -void __init intel_p6_mcheck_init(struct cpuinfo_x86 *c) +void __devinit intel_p6_mcheck_init(struct cpuinfo_x86 *c) { u32 l, h; int i; diff --git a/arch/i386/kernel/cpu/mcheck/winchip.c b/arch/i386/kernel/cpu/mcheck/winchip.c index 753fa7acb984..7bae68fa168f 100644 --- a/arch/i386/kernel/cpu/mcheck/winchip.c +++ b/arch/i386/kernel/cpu/mcheck/winchip.c @@ -23,7 +23,7 @@ static fastcall void winchip_machine_check(struct pt_regs * regs, long error_cod } /* Set up machine check reporting on the Winchip C6 series */ -void __init winchip_mcheck_init(struct cpuinfo_x86 *c) +void __devinit winchip_mcheck_init(struct cpuinfo_x86 *c) { u32 lo, hi; machine_check_vector = winchip_machine_check; diff --git a/arch/i386/kernel/cpu/mtrr/generic.c b/arch/i386/kernel/cpu/mtrr/generic.c index f468a979e9aa..64d91f73a0a4 100644 --- a/arch/i386/kernel/cpu/mtrr/generic.c +++ b/arch/i386/kernel/cpu/mtrr/generic.c @@ -70,8 +70,7 @@ void __init get_mtrr_state(void) /* Free resources associated with a struct mtrr_state */ void __init finalize_mtrr_state(void) { - if (mtrr_state.var_ranges) - kfree(mtrr_state.var_ranges); + kfree(mtrr_state.var_ranges); mtrr_state.var_ranges = NULL; } diff --git a/arch/i386/kernel/crash.c b/arch/i386/kernel/crash.c new file mode 100644 index 000000000000..e5fab12f7926 --- /dev/null +++ b/arch/i386/kernel/crash.c @@ -0,0 +1,223 @@ +/* + * Architecture specific (i386) functions for kexec based crash dumps. + * + * Created by: Hariprasad Nellitheertha (hari@in.ibm.com) + * + * Copyright (C) IBM Corporation, 2004. All rights reserved. + * + */ + +#include <linux/init.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/smp.h> +#include <linux/irq.h> +#include <linux/reboot.h> +#include <linux/kexec.h> +#include <linux/irq.h> +#include <linux/delay.h> +#include <linux/elf.h> +#include <linux/elfcore.h> + +#include <asm/processor.h> +#include <asm/hardirq.h> +#include <asm/nmi.h> +#include <asm/hw_irq.h> +#include <asm/apic.h> +#include <mach_ipi.h> + + +note_buf_t crash_notes[NR_CPUS]; +/* This keeps a track of which one is crashing cpu. */ +static int crashing_cpu; + +static u32 *append_elf_note(u32 *buf, char *name, unsigned type, void *data, + size_t data_len) +{ + struct elf_note note; + + note.n_namesz = strlen(name) + 1; + note.n_descsz = data_len; + note.n_type = type; + memcpy(buf, ¬e, sizeof(note)); + buf += (sizeof(note) +3)/4; + memcpy(buf, name, note.n_namesz); + buf += (note.n_namesz + 3)/4; + memcpy(buf, data, note.n_descsz); + buf += (note.n_descsz + 3)/4; + + return buf; +} + +static void final_note(u32 *buf) +{ + struct elf_note note; + + note.n_namesz = 0; + note.n_descsz = 0; + note.n_type = 0; + memcpy(buf, ¬e, sizeof(note)); +} + +static void crash_save_this_cpu(struct pt_regs *regs, int cpu) +{ + struct elf_prstatus prstatus; + u32 *buf; + + if ((cpu < 0) || (cpu >= NR_CPUS)) + return; + + /* Using ELF notes here is opportunistic. + * I need a well defined structure format + * for the data I pass, and I need tags + * on the data to indicate what information I have + * squirrelled away. ELF notes happen to provide + * all of that that no need to invent something new. + */ + buf = &crash_notes[cpu][0]; + memset(&prstatus, 0, sizeof(prstatus)); + prstatus.pr_pid = current->pid; + elf_core_copy_regs(&prstatus.pr_reg, regs); + buf = append_elf_note(buf, "CORE", NT_PRSTATUS, &prstatus, + sizeof(prstatus)); + final_note(buf); +} + +static void crash_get_current_regs(struct pt_regs *regs) +{ + __asm__ __volatile__("movl %%ebx,%0" : "=m"(regs->ebx)); + __asm__ __volatile__("movl %%ecx,%0" : "=m"(regs->ecx)); + __asm__ __volatile__("movl %%edx,%0" : "=m"(regs->edx)); + __asm__ __volatile__("movl %%esi,%0" : "=m"(regs->esi)); + __asm__ __volatile__("movl %%edi,%0" : "=m"(regs->edi)); + __asm__ __volatile__("movl %%ebp,%0" : "=m"(regs->ebp)); + __asm__ __volatile__("movl %%eax,%0" : "=m"(regs->eax)); + __asm__ __volatile__("movl %%esp,%0" : "=m"(regs->esp)); + __asm__ __volatile__("movw %%ss, %%ax;" :"=a"(regs->xss)); + __asm__ __volatile__("movw %%cs, %%ax;" :"=a"(regs->xcs)); + __asm__ __volatile__("movw %%ds, %%ax;" :"=a"(regs->xds)); + __asm__ __volatile__("movw %%es, %%ax;" :"=a"(regs->xes)); + __asm__ __volatile__("pushfl; popl %0" :"=m"(regs->eflags)); + + regs->eip = (unsigned long)current_text_addr(); +} + +/* CPU does not save ss and esp on stack if execution is already + * running in kernel mode at the time of NMI occurrence. This code + * fixes it. + */ +static void crash_setup_regs(struct pt_regs *newregs, struct pt_regs *oldregs) +{ + memcpy(newregs, oldregs, sizeof(*newregs)); + newregs->esp = (unsigned long)&(oldregs->esp); + __asm__ __volatile__("xorl %eax, %eax;"); + __asm__ __volatile__ ("movw %%ss, %%ax;" :"=a"(newregs->xss)); +} + +/* We may have saved_regs from where the error came from + * or it is NULL if via a direct panic(). + */ +static void crash_save_self(struct pt_regs *saved_regs) +{ + struct pt_regs regs; + int cpu; + + cpu = smp_processor_id(); + if (saved_regs) + crash_setup_regs(®s, saved_regs); + else + crash_get_current_regs(®s); + crash_save_this_cpu(®s, cpu); +} + +#ifdef CONFIG_SMP +static atomic_t waiting_for_crash_ipi; + +static int crash_nmi_callback(struct pt_regs *regs, int cpu) +{ + struct pt_regs fixed_regs; + + /* Don't do anything if this handler is invoked on crashing cpu. + * Otherwise, system will completely hang. Crashing cpu can get + * an NMI if system was initially booted with nmi_watchdog parameter. + */ + if (cpu == crashing_cpu) + return 1; + local_irq_disable(); + + if (!user_mode(regs)) { + crash_setup_regs(&fixed_regs, regs); + regs = &fixed_regs; + } + crash_save_this_cpu(regs, cpu); + disable_local_APIC(); + atomic_dec(&waiting_for_crash_ipi); + /* Assume hlt works */ + __asm__("hlt"); + for(;;); + + return 1; +} + +/* + * By using the NMI code instead of a vector we just sneak thru the + * word generator coming out with just what we want. AND it does + * not matter if clustered_apic_mode is set or not. + */ +static void smp_send_nmi_allbutself(void) +{ + send_IPI_allbutself(APIC_DM_NMI); +} + +static void nmi_shootdown_cpus(void) +{ + unsigned long msecs; + + atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1); + /* Would it be better to replace the trap vector here? */ + set_nmi_callback(crash_nmi_callback); + /* Ensure the new callback function is set before sending + * out the NMI + */ + wmb(); + + smp_send_nmi_allbutself(); + + msecs = 1000; /* Wait at most a second for the other cpus to stop */ + while ((atomic_read(&waiting_for_crash_ipi) > 0) && msecs) { + mdelay(1); + msecs--; + } + + /* Leave the nmi callback set */ + disable_local_APIC(); +} +#else +static void nmi_shootdown_cpus(void) +{ + /* There are no cpus to shootdown */ +} +#endif + +void machine_crash_shutdown(struct pt_regs *regs) +{ + /* This function is only called after the system + * has paniced or is otherwise in a critical state. + * The minimum amount of code to allow a kexec'd kernel + * to run successfully needs to happen here. + * + * In practice this means shooting down the other cpus in + * an SMP system. + */ + /* The kernel is broken so disable interrupts */ + local_irq_disable(); + + /* Make a note of crashing cpu. Will be used in NMI callback.*/ + crashing_cpu = smp_processor_id(); + nmi_shootdown_cpus(); + lapic_shutdown(); +#if defined(CONFIG_X86_IO_APIC) + disable_IO_APIC(); +#endif + crash_save_self(regs); +} diff --git a/arch/i386/kernel/dmi_scan.c b/arch/i386/kernel/dmi_scan.c index 6ed7e28f306c..a3cdf894302b 100644 --- a/arch/i386/kernel/dmi_scan.c +++ b/arch/i386/kernel/dmi_scan.c @@ -1,22 +1,15 @@ #include <linux/types.h> -#include <linux/kernel.h> #include <linux/string.h> #include <linux/init.h> #include <linux/module.h> -#include <linux/slab.h> -#include <linux/acpi.h> -#include <asm/io.h> -#include <linux/pm.h> -#include <asm/system.h> #include <linux/dmi.h> #include <linux/bootmem.h> -struct dmi_header -{ - u8 type; - u8 length; - u16 handle; +struct dmi_header { + u8 type; + u8 length; + u16 handle; }; #undef DMI_DEBUG @@ -29,15 +22,13 @@ struct dmi_header static char * __init dmi_string(struct dmi_header *dm, u8 s) { - u8 *bp=(u8 *)dm; - bp+=dm->length; - if(!s) + u8 *bp = ((u8 *) dm) + dm->length; + + if (!s) return ""; s--; - while(s>0 && *bp) - { - bp+=strlen(bp); - bp++; + while (s > 0 && *bp) { + bp += strlen(bp) + 1; s--; } return bp; @@ -47,16 +38,14 @@ static char * __init dmi_string(struct dmi_header *dm, u8 s) * We have to be cautious here. We have seen BIOSes with DMI pointers * pointing to completely the wrong place for example */ - -static int __init dmi_table(u32 base, int len, int num, void (*decode)(struct dmi_header *)) +static int __init dmi_table(u32 base, int len, int num, + void (*decode)(struct dmi_header *)) { - u8 *buf; - struct dmi_header *dm; - u8 *data; - int i=0; + u8 *buf, *data; + int i = 0; buf = bt_ioremap(base, len); - if(buf==NULL) + if (buf == NULL) return -1; data = buf; @@ -65,36 +54,34 @@ static int __init dmi_table(u32 base, int len, int num, void (*decode)(struct dm * Stop when we see all the items the table claimed to have * OR we run off the end of the table (also happens) */ - - while(i<num && data-buf+sizeof(struct dmi_header)<=len) - { - dm=(struct dmi_header *)data; + while ((i < num) && (data - buf + sizeof(struct dmi_header)) <= len) { + struct dmi_header *dm = (struct dmi_header *)data; /* * We want to know the total length (formated area and strings) * before decoding to make sure we won't run off the table in * dmi_decode or dmi_string */ - data+=dm->length; - while(data-buf<len-1 && (data[0] || data[1])) + data += dm->length; + while ((data - buf < len - 1) && (data[0] || data[1])) data++; - if(data-buf<len-1) + if (data - buf < len - 1) decode(dm); - data+=2; + data += 2; i++; } bt_iounmap(buf, len); return 0; } - -inline static int __init dmi_checksum(u8 *buf) +static int __init dmi_checksum(u8 *buf) { - u8 sum=0; + u8 sum = 0; int a; - for(a=0; a<15; a++) - sum+=buf[a]; - return (sum==0); + for (a = 0; a < 15; a++) + sum += buf[a]; + + return sum == 0; } static int __init dmi_iterate(void (*decode)(struct dmi_header *)) @@ -110,28 +97,30 @@ static int __init dmi_iterate(void (*decode)(struct dmi_header *)) p = ioremap(0xF0000, 0x10000); if (p == NULL) return -1; + for (q = p; q < p + 0x10000; q += 16) { memcpy_fromio(buf, q, 15); - if(memcmp(buf, "_DMI_", 5)==0 && dmi_checksum(buf)) - { - u16 num=buf[13]<<8|buf[12]; - u16 len=buf[7]<<8|buf[6]; - u32 base=buf[11]<<24|buf[10]<<16|buf[9]<<8|buf[8]; + if ((memcmp(buf, "_DMI_", 5) == 0) && dmi_checksum(buf)) { + u16 num = (buf[13] << 8) | buf[12]; + u16 len = (buf[7] << 8) | buf[6]; + u32 base = (buf[11] << 24) | (buf[10] << 16) | + (buf[9] << 8) | buf[8]; /* * DMI version 0.0 means that the real version is taken from * the SMBIOS version, which we don't know at this point. */ - if(buf[14]!=0) + if (buf[14] != 0) printk(KERN_INFO "DMI %d.%d present.\n", - buf[14]>>4, buf[14]&0x0F); + buf[14] >> 4, buf[14] & 0xF); else printk(KERN_INFO "DMI present.\n"); + dmi_printk((KERN_INFO "%d structures occupying %d bytes.\n", num, len)); - dmi_printk((KERN_INFO "DMI table at 0x%08X.\n", - base)); - if(dmi_table(base,len, num, decode)==0) + dmi_printk((KERN_INFO "DMI table at 0x%08X.\n", base)); + + if (dmi_table(base,len, num, decode) == 0) return 0; } } @@ -143,16 +132,17 @@ static char *dmi_ident[DMI_STRING_MAX]; /* * Save a DMI string */ - static void __init dmi_save_ident(struct dmi_header *dm, int slot, int string) { char *d = (char*)dm; char *p = dmi_string(dm, d[string]); - if(p==NULL || *p == 0) + + if (p == NULL || *p == 0) return; if (dmi_ident[slot]) return; - dmi_ident[slot] = alloc_bootmem(strlen(p)+1); + + dmi_ident[slot] = alloc_bootmem(strlen(p) + 1); if(dmi_ident[slot]) strcpy(dmi_ident[slot], p); else @@ -160,281 +150,47 @@ static void __init dmi_save_ident(struct dmi_header *dm, int slot, int string) } /* - * Ugly compatibility crap. - */ -#define dmi_blacklist dmi_system_id -#define NO_MATCH { DMI_NONE, NULL} -#define MATCH DMI_MATCH - -/* - * Toshiba keyboard likes to repeat keys when they are not repeated. - */ - -static __init int broken_toshiba_keyboard(struct dmi_blacklist *d) -{ - printk(KERN_WARNING "Toshiba with broken keyboard detected. If your keyboard sometimes generates 3 keypresses instead of one, see http://davyd.ucc.asn.au/projects/toshiba/README\n"); - return 0; -} - - -#ifdef CONFIG_ACPI_SLEEP -static __init int reset_videomode_after_s3(struct dmi_blacklist *d) -{ - /* See acpi_wakeup.S */ - extern long acpi_video_flags; - acpi_video_flags |= 2; - return 0; -} -#endif - - -#ifdef CONFIG_ACPI_BOOT -extern int acpi_force; - -static __init __attribute__((unused)) int dmi_disable_acpi(struct dmi_blacklist *d) -{ - if (!acpi_force) { - printk(KERN_NOTICE "%s detected: acpi off\n",d->ident); - disable_acpi(); - } else { - printk(KERN_NOTICE - "Warning: DMI blacklist says broken, but acpi forced\n"); - } - return 0; -} - -/* - * Limit ACPI to CPU enumeration for HT - */ -static __init __attribute__((unused)) int force_acpi_ht(struct dmi_blacklist *d) -{ - if (!acpi_force) { - printk(KERN_NOTICE "%s detected: force use of acpi=ht\n", d->ident); - disable_acpi(); - acpi_ht = 1; - } else { - printk(KERN_NOTICE - "Warning: acpi=force overrules DMI blacklist: acpi=ht\n"); - } - return 0; -} -#endif - -#ifdef CONFIG_ACPI_PCI -static __init int disable_acpi_irq(struct dmi_blacklist *d) -{ - if (!acpi_force) { - printk(KERN_NOTICE "%s detected: force use of acpi=noirq\n", - d->ident); - acpi_noirq_set(); - } - return 0; -} -static __init int disable_acpi_pci(struct dmi_blacklist *d) -{ - if (!acpi_force) { - printk(KERN_NOTICE "%s detected: force use of pci=noacpi\n", - d->ident); - acpi_disable_pci(); - } - return 0; -} -#endif - -/* - * Process the DMI blacklists - */ - - -/* - * This will be expanded over time to force things like the APM - * interrupt mask settings according to the laptop - */ - -static __initdata struct dmi_blacklist dmi_blacklist[]={ - - { broken_toshiba_keyboard, "Toshiba Satellite 4030cdt", { /* Keyboard generates spurious repeats */ - MATCH(DMI_PRODUCT_NAME, "S4030CDT/4.3"), - NO_MATCH, NO_MATCH, NO_MATCH - } }, -#ifdef CONFIG_ACPI_SLEEP - { reset_videomode_after_s3, "Toshiba Satellite 4030cdt", { /* Reset video mode after returning from ACPI S3 sleep */ - MATCH(DMI_PRODUCT_NAME, "S4030CDT/4.3"), - NO_MATCH, NO_MATCH, NO_MATCH - } }, -#endif - -#ifdef CONFIG_ACPI_BOOT - /* - * If your system is blacklisted here, but you find that acpi=force - * works for you, please contact acpi-devel@sourceforge.net - */ - - /* - * Boxes that need ACPI disabled - */ - - { dmi_disable_acpi, "IBM Thinkpad", { - MATCH(DMI_BOARD_VENDOR, "IBM"), - MATCH(DMI_BOARD_NAME, "2629H1G"), - NO_MATCH, NO_MATCH }}, - - /* - * Boxes that need acpi=ht - */ - - { force_acpi_ht, "FSC Primergy T850", { - MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"), - MATCH(DMI_PRODUCT_NAME, "PRIMERGY T850"), - NO_MATCH, NO_MATCH }}, - - { force_acpi_ht, "DELL GX240", { - MATCH(DMI_BOARD_VENDOR, "Dell Computer Corporation"), - MATCH(DMI_BOARD_NAME, "OptiPlex GX240"), - NO_MATCH, NO_MATCH }}, - - { force_acpi_ht, "HP VISUALIZE NT Workstation", { - MATCH(DMI_BOARD_VENDOR, "Hewlett-Packard"), - MATCH(DMI_PRODUCT_NAME, "HP VISUALIZE NT Workstation"), - NO_MATCH, NO_MATCH }}, - - { force_acpi_ht, "Compaq Workstation W8000", { - MATCH(DMI_SYS_VENDOR, "Compaq"), - MATCH(DMI_PRODUCT_NAME, "Workstation W8000"), - NO_MATCH, NO_MATCH }}, - - { force_acpi_ht, "ASUS P4B266", { - MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), - MATCH(DMI_BOARD_NAME, "P4B266"), - NO_MATCH, NO_MATCH }}, - - { force_acpi_ht, "ASUS P2B-DS", { - MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), - MATCH(DMI_BOARD_NAME, "P2B-DS"), - NO_MATCH, NO_MATCH }}, - - { force_acpi_ht, "ASUS CUR-DLS", { - MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), - MATCH(DMI_BOARD_NAME, "CUR-DLS"), - NO_MATCH, NO_MATCH }}, - - { force_acpi_ht, "ABIT i440BX-W83977", { - MATCH(DMI_BOARD_VENDOR, "ABIT <http://www.abit.com>"), - MATCH(DMI_BOARD_NAME, "i440BX-W83977 (BP6)"), - NO_MATCH, NO_MATCH }}, - - { force_acpi_ht, "IBM Bladecenter", { - MATCH(DMI_BOARD_VENDOR, "IBM"), - MATCH(DMI_BOARD_NAME, "IBM eServer BladeCenter HS20"), - NO_MATCH, NO_MATCH }}, - - { force_acpi_ht, "IBM eServer xSeries 360", { - MATCH(DMI_BOARD_VENDOR, "IBM"), - MATCH(DMI_BOARD_NAME, "eServer xSeries 360"), - NO_MATCH, NO_MATCH }}, - - { force_acpi_ht, "IBM eserver xSeries 330", { - MATCH(DMI_BOARD_VENDOR, "IBM"), - MATCH(DMI_BOARD_NAME, "eserver xSeries 330"), - NO_MATCH, NO_MATCH }}, - - { force_acpi_ht, "IBM eserver xSeries 440", { - MATCH(DMI_BOARD_VENDOR, "IBM"), - MATCH(DMI_PRODUCT_NAME, "eserver xSeries 440"), - NO_MATCH, NO_MATCH }}, - -#endif // CONFIG_ACPI_BOOT - -#ifdef CONFIG_ACPI_PCI - /* - * Boxes that need ACPI PCI IRQ routing disabled - */ - - { disable_acpi_irq, "ASUS A7V", { - MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC"), - MATCH(DMI_BOARD_NAME, "<A7V>"), - /* newer BIOS, Revision 1011, does work */ - MATCH(DMI_BIOS_VERSION, "ASUS A7V ACPI BIOS Revision 1007"), - NO_MATCH }}, - - /* - * Boxes that need ACPI PCI IRQ routing and PCI scan disabled - */ - { disable_acpi_pci, "ASUS PR-DLS", { /* _BBN 0 bug */ - MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), - MATCH(DMI_BOARD_NAME, "PR-DLS"), - MATCH(DMI_BIOS_VERSION, "ASUS PR-DLS ACPI BIOS Revision 1010"), - MATCH(DMI_BIOS_DATE, "03/21/2003") }}, - - { disable_acpi_pci, "Acer TravelMate 36x Laptop", { - MATCH(DMI_SYS_VENDOR, "Acer"), - MATCH(DMI_PRODUCT_NAME, "TravelMate 360"), - NO_MATCH, NO_MATCH - } }, - -#endif - - { NULL, } -}; - -/* * Process a DMI table entry. Right now all we care about are the BIOS * and machine entries. For 2.5 we should pull the smbus controller info * out of here. */ - static void __init dmi_decode(struct dmi_header *dm) { -#ifdef DMI_DEBUG - u8 *data = (u8 *)dm; -#endif + u8 *data __attribute__((__unused__)) = (u8 *)dm; - switch(dm->type) - { - case 0: - dmi_printk(("BIOS Vendor: %s\n", - dmi_string(dm, data[4]))); - dmi_save_ident(dm, DMI_BIOS_VENDOR, 4); - dmi_printk(("BIOS Version: %s\n", - dmi_string(dm, data[5]))); - dmi_save_ident(dm, DMI_BIOS_VERSION, 5); - dmi_printk(("BIOS Release: %s\n", - dmi_string(dm, data[8]))); - dmi_save_ident(dm, DMI_BIOS_DATE, 8); - break; - case 1: - dmi_printk(("System Vendor: %s\n", - dmi_string(dm, data[4]))); - dmi_save_ident(dm, DMI_SYS_VENDOR, 4); - dmi_printk(("Product Name: %s\n", - dmi_string(dm, data[5]))); - dmi_save_ident(dm, DMI_PRODUCT_NAME, 5); - dmi_printk(("Version: %s\n", - dmi_string(dm, data[6]))); - dmi_save_ident(dm, DMI_PRODUCT_VERSION, 6); - dmi_printk(("Serial Number: %s\n", - dmi_string(dm, data[7]))); - break; - case 2: - dmi_printk(("Board Vendor: %s\n", - dmi_string(dm, data[4]))); - dmi_save_ident(dm, DMI_BOARD_VENDOR, 4); - dmi_printk(("Board Name: %s\n", - dmi_string(dm, data[5]))); - dmi_save_ident(dm, DMI_BOARD_NAME, 5); - dmi_printk(("Board Version: %s\n", - dmi_string(dm, data[6]))); - dmi_save_ident(dm, DMI_BOARD_VERSION, 6); - break; + switch(dm->type) { + case 0: + dmi_printk(("BIOS Vendor: %s\n", dmi_string(dm, data[4]))); + dmi_save_ident(dm, DMI_BIOS_VENDOR, 4); + dmi_printk(("BIOS Version: %s\n", dmi_string(dm, data[5]))); + dmi_save_ident(dm, DMI_BIOS_VERSION, 5); + dmi_printk(("BIOS Release: %s\n", dmi_string(dm, data[8]))); + dmi_save_ident(dm, DMI_BIOS_DATE, 8); + break; + case 1: + dmi_printk(("System Vendor: %s\n", dmi_string(dm, data[4]))); + dmi_save_ident(dm, DMI_SYS_VENDOR, 4); + dmi_printk(("Product Name: %s\n", dmi_string(dm, data[5]))); + dmi_save_ident(dm, DMI_PRODUCT_NAME, 5); + dmi_printk(("Version: %s\n", dmi_string(dm, data[6]))); + dmi_save_ident(dm, DMI_PRODUCT_VERSION, 6); + dmi_printk(("Serial Number: %s\n", dmi_string(dm, data[7]))); + dmi_save_ident(dm, DMI_PRODUCT_SERIAL, 7); + break; + case 2: + dmi_printk(("Board Vendor: %s\n", dmi_string(dm, data[4]))); + dmi_save_ident(dm, DMI_BOARD_VENDOR, 4); + dmi_printk(("Board Name: %s\n", dmi_string(dm, data[5]))); + dmi_save_ident(dm, DMI_BOARD_NAME, 5); + dmi_printk(("Board Version: %s\n", dmi_string(dm, data[6]))); + dmi_save_ident(dm, DMI_BOARD_VERSION, 6); + break; } } void __init dmi_scan_machine(void) { - int err = dmi_iterate(dmi_decode); - if(err == 0) - dmi_check_system(dmi_blacklist); - else + if (dmi_iterate(dmi_decode)) printk(KERN_INFO "DMI not present.\n"); } @@ -470,7 +226,6 @@ fail: d++; return count; } - EXPORT_SYMBOL(dmi_check_system); /** @@ -480,8 +235,8 @@ EXPORT_SYMBOL(dmi_check_system); * Returns one DMI data value, can be used to perform * complex DMI data checks. */ -char * dmi_get_system_info(int field) +char *dmi_get_system_info(int field) { return dmi_ident[field]; } - +EXPORT_SYMBOL(dmi_get_system_info); diff --git a/arch/i386/kernel/efi.c b/arch/i386/kernel/efi.c index f732f427b418..385883ea8c19 100644 --- a/arch/i386/kernel/efi.c +++ b/arch/i386/kernel/efi.c @@ -30,6 +30,7 @@ #include <linux/ioport.h> #include <linux/module.h> #include <linux/efi.h> +#include <linux/kexec.h> #include <asm/setup.h> #include <asm/io.h> @@ -598,6 +599,9 @@ efi_initialize_iomem_resources(struct resource *code_resource, if (md->type == EFI_CONVENTIONAL_MEMORY) { request_resource(res, code_resource); request_resource(res, data_resource); +#ifdef CONFIG_KEXEC + request_resource(res, &crashk_res); +#endif } } } diff --git a/arch/i386/kernel/head.S b/arch/i386/kernel/head.S index e966fc8c44c4..4477bb107098 100644 --- a/arch/i386/kernel/head.S +++ b/arch/i386/kernel/head.S @@ -299,7 +299,6 @@ is386: movl $2,%ecx # set MP movl %eax,%cr0 call check_x87 - incb ready lgdt cpu_gdt_descr lidt idt_descr ljmp $(__KERNEL_CS),$1f @@ -316,8 +315,9 @@ is386: movl $2,%ecx # set MP lldt %ax cld # gcc2 wants the direction flag cleared at all times #ifdef CONFIG_SMP - movb ready, %cl - cmpb $1,%cl + movb ready, %cl + movb $1, ready + cmpb $0,%cl je 1f # the first CPU calls start_kernel # all other CPUs call initialize_secondary call initialize_secondary diff --git a/arch/i386/kernel/i8259.c b/arch/i386/kernel/i8259.c index 2c4813b47e57..178f4e9bac9d 100644 --- a/arch/i386/kernel/i8259.c +++ b/arch/i386/kernel/i8259.c @@ -268,10 +268,22 @@ static int i8259A_suspend(struct sys_device *dev, pm_message_t state) return 0; } +static int i8259A_shutdown(struct sys_device *dev) +{ + /* Put the i8259A into a quiescent state that + * the kernel initialization code can get it + * out of. + */ + outb(0xff, 0x21); /* mask all of 8259A-1 */ + outb(0xff, 0xA1); /* mask all of 8259A-1 */ + return 0; +} + static struct sysdev_class i8259_sysdev_class = { set_kset_name("i8259"), .suspend = i8259A_suspend, .resume = i8259A_resume, + .shutdown = i8259A_shutdown, }; static struct sys_device device_i8259A = { diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c index 2451a3a99440..35eb8e29c485 100644 --- a/arch/i386/kernel/io_apic.c +++ b/arch/i386/kernel/io_apic.c @@ -576,9 +576,11 @@ static int balanced_irq(void *unused) try_to_freeze(); if (time_after(jiffies, prev_balance_time+balanced_irq_interval)) { + preempt_disable(); do_irq_balance(); prev_balance_time = jiffies; time_remaining = balanced_irq_interval; + preempt_enable(); } } return 0; @@ -630,10 +632,8 @@ static int __init balanced_irq_init(void) printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq"); failed: for (i = 0; i < NR_CPUS; i++) { - if(irq_cpu_data[i].irq_delta) - kfree(irq_cpu_data[i].irq_delta); - if(irq_cpu_data[i].last_irq) - kfree(irq_cpu_data[i].last_irq); + kfree(irq_cpu_data[i].irq_delta); + kfree(irq_cpu_data[i].last_irq); } return 0; } @@ -1634,12 +1634,43 @@ static void __init enable_IO_APIC(void) */ void disable_IO_APIC(void) { + int pin; /* * Clear the IO-APIC before rebooting: */ clear_IO_APIC(); - disconnect_bsp_APIC(); + /* + * If the i82559 is routed through an IOAPIC + * Put that IOAPIC in virtual wire mode + * so legacy interrups can be delivered. + */ + pin = find_isa_irq_pin(0, mp_ExtINT); + if (pin != -1) { + struct IO_APIC_route_entry entry; + unsigned long flags; + + memset(&entry, 0, sizeof(entry)); + entry.mask = 0; /* Enabled */ + entry.trigger = 0; /* Edge */ + entry.irr = 0; + entry.polarity = 0; /* High */ + entry.delivery_status = 0; + entry.dest_mode = 0; /* Physical */ + entry.delivery_mode = 7; /* ExtInt */ + entry.vector = 0; + entry.dest.physical.physical_dest = 0; + + + /* + * Add it to the IO-APIC irq-routing table: + */ + spin_lock_irqsave(&ioapic_lock, flags); + io_apic_write(0, 0x11+2*pin, *(((int *)&entry)+1)); + io_apic_write(0, 0x10+2*pin, *(((int *)&entry)+0)); + spin_unlock_irqrestore(&ioapic_lock, flags); + } + disconnect_bsp_APIC(pin != -1); } /* diff --git a/arch/i386/kernel/irq.c b/arch/i386/kernel/irq.c index 73945a3c53c4..ce66dcc26d90 100644 --- a/arch/i386/kernel/irq.c +++ b/arch/i386/kernel/irq.c @@ -15,6 +15,9 @@ #include <linux/seq_file.h> #include <linux/interrupt.h> #include <linux/kernel_stat.h> +#include <linux/notifier.h> +#include <linux/cpu.h> +#include <linux/delay.h> DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_maxaligned_in_smp; EXPORT_PER_CPU_SYMBOL(irq_stat); @@ -153,6 +156,11 @@ void irq_ctx_init(int cpu) cpu,hardirq_ctx[cpu],softirq_ctx[cpu]); } +void irq_ctx_exit(int cpu) +{ + hardirq_ctx[cpu] = NULL; +} + extern asmlinkage void __do_softirq(void); asmlinkage void do_softirq(void) @@ -210,9 +218,8 @@ int show_interrupts(struct seq_file *p, void *v) if (i == 0) { seq_printf(p, " "); - for (j=0; j<NR_CPUS; j++) - if (cpu_online(j)) - seq_printf(p, "CPU%d ",j); + for_each_cpu(j) + seq_printf(p, "CPU%d ",j); seq_putc(p, '\n'); } @@ -225,9 +232,8 @@ int show_interrupts(struct seq_file *p, void *v) #ifndef CONFIG_SMP seq_printf(p, "%10u ", kstat_irqs(i)); #else - for (j = 0; j < NR_CPUS; j++) - if (cpu_online(j)) - seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]); + for_each_cpu(j) + seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]); #endif seq_printf(p, " %14s", irq_desc[i].handler->typename); seq_printf(p, " %s", action->name); @@ -240,16 +246,14 @@ skip: spin_unlock_irqrestore(&irq_desc[i].lock, flags); } else if (i == NR_IRQS) { seq_printf(p, "NMI: "); - for (j = 0; j < NR_CPUS; j++) - if (cpu_online(j)) - seq_printf(p, "%10u ", nmi_count(j)); + for_each_cpu(j) + seq_printf(p, "%10u ", nmi_count(j)); seq_putc(p, '\n'); #ifdef CONFIG_X86_LOCAL_APIC seq_printf(p, "LOC: "); - for (j = 0; j < NR_CPUS; j++) - if (cpu_online(j)) - seq_printf(p, "%10u ", - per_cpu(irq_stat,j).apic_timer_irqs); + for_each_cpu(j) + seq_printf(p, "%10u ", + per_cpu(irq_stat,j).apic_timer_irqs); seq_putc(p, '\n'); #endif seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); @@ -259,3 +263,45 @@ skip: } return 0; } + +#ifdef CONFIG_HOTPLUG_CPU +#include <mach_apic.h> + +void fixup_irqs(cpumask_t map) +{ + unsigned int irq; + static int warned; + + for (irq = 0; irq < NR_IRQS; irq++) { + cpumask_t mask; + if (irq == 2) + continue; + + cpus_and(mask, irq_affinity[irq], map); + if (any_online_cpu(mask) == NR_CPUS) { + printk("Breaking affinity for irq %i\n", irq); + mask = map; + } + if (irq_desc[irq].handler->set_affinity) + irq_desc[irq].handler->set_affinity(irq, mask); + else if (irq_desc[irq].action && !(warned++)) + printk("Cannot set affinity for irq %i\n", irq); + } + +#if 0 + barrier(); + /* Ingo Molnar says: "after the IO-APIC masks have been redirected + [note the nop - the interrupt-enable boundary on x86 is two + instructions from sti] - to flush out pending hardirqs and + IPIs. After this point nothing is supposed to reach this CPU." */ + __asm__ __volatile__("sti; nop; cli"); + barrier(); +#else + /* That doesn't seem sufficient. Give it 1ms. */ + local_irq_enable(); + mdelay(1); + local_irq_disable(); +#endif +} +#endif + diff --git a/arch/i386/kernel/machine_kexec.c b/arch/i386/kernel/machine_kexec.c new file mode 100644 index 000000000000..52ed18d8b511 --- /dev/null +++ b/arch/i386/kernel/machine_kexec.c @@ -0,0 +1,226 @@ +/* + * machine_kexec.c - handle transition of Linux booting another kernel + * Copyright (C) 2002-2005 Eric Biederman <ebiederm@xmission.com> + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#include <linux/mm.h> +#include <linux/kexec.h> +#include <linux/delay.h> +#include <asm/pgtable.h> +#include <asm/pgalloc.h> +#include <asm/tlbflush.h> +#include <asm/mmu_context.h> +#include <asm/io.h> +#include <asm/apic.h> +#include <asm/cpufeature.h> + +static inline unsigned long read_cr3(void) +{ + unsigned long cr3; + asm volatile("movl %%cr3,%0": "=r"(cr3)); + return cr3; +} + +#define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE))) + +#define L0_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) +#define L1_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) +#define L2_ATTR (_PAGE_PRESENT) + +#define LEVEL0_SIZE (1UL << 12UL) + +#ifndef CONFIG_X86_PAE +#define LEVEL1_SIZE (1UL << 22UL) +static u32 pgtable_level1[1024] PAGE_ALIGNED; + +static void identity_map_page(unsigned long address) +{ + unsigned long level1_index, level2_index; + u32 *pgtable_level2; + + /* Find the current page table */ + pgtable_level2 = __va(read_cr3()); + + /* Find the indexes of the physical address to identity map */ + level1_index = (address % LEVEL1_SIZE)/LEVEL0_SIZE; + level2_index = address / LEVEL1_SIZE; + + /* Identity map the page table entry */ + pgtable_level1[level1_index] = address | L0_ATTR; + pgtable_level2[level2_index] = __pa(pgtable_level1) | L1_ATTR; + + /* Flush the tlb so the new mapping takes effect. + * Global tlb entries are not flushed but that is not an issue. + */ + load_cr3(pgtable_level2); +} + +#else +#define LEVEL1_SIZE (1UL << 21UL) +#define LEVEL2_SIZE (1UL << 30UL) +static u64 pgtable_level1[512] PAGE_ALIGNED; +static u64 pgtable_level2[512] PAGE_ALIGNED; + +static void identity_map_page(unsigned long address) +{ + unsigned long level1_index, level2_index, level3_index; + u64 *pgtable_level3; + + /* Find the current page table */ + pgtable_level3 = __va(read_cr3()); + + /* Find the indexes of the physical address to identity map */ + level1_index = (address % LEVEL1_SIZE)/LEVEL0_SIZE; + level2_index = (address % LEVEL2_SIZE)/LEVEL1_SIZE; + level3_index = address / LEVEL2_SIZE; + + /* Identity map the page table entry */ + pgtable_level1[level1_index] = address | L0_ATTR; + pgtable_level2[level2_index] = __pa(pgtable_level1) | L1_ATTR; + set_64bit(&pgtable_level3[level3_index], + __pa(pgtable_level2) | L2_ATTR); + + /* Flush the tlb so the new mapping takes effect. + * Global tlb entries are not flushed but that is not an issue. + */ + load_cr3(pgtable_level3); +} +#endif + + +static void set_idt(void *newidt, __u16 limit) +{ + unsigned char curidt[6]; + + /* ia32 supports unaliged loads & stores */ + (*(__u16 *)(curidt)) = limit; + (*(__u32 *)(curidt +2)) = (unsigned long)(newidt); + + __asm__ __volatile__ ( + "lidt %0\n" + : "=m" (curidt) + ); +}; + + +static void set_gdt(void *newgdt, __u16 limit) +{ + unsigned char curgdt[6]; + + /* ia32 supports unaligned loads & stores */ + (*(__u16 *)(curgdt)) = limit; + (*(__u32 *)(curgdt +2)) = (unsigned long)(newgdt); + + __asm__ __volatile__ ( + "lgdt %0\n" + : "=m" (curgdt) + ); +}; + +static void load_segments(void) +{ +#define __STR(X) #X +#define STR(X) __STR(X) + + __asm__ __volatile__ ( + "\tljmp $"STR(__KERNEL_CS)",$1f\n" + "\t1:\n" + "\tmovl $"STR(__KERNEL_DS)",%eax\n" + "\tmovl %eax,%ds\n" + "\tmovl %eax,%es\n" + "\tmovl %eax,%fs\n" + "\tmovl %eax,%gs\n" + "\tmovl %eax,%ss\n" + ); +#undef STR +#undef __STR +} + +typedef asmlinkage NORET_TYPE void (*relocate_new_kernel_t)( + unsigned long indirection_page, + unsigned long reboot_code_buffer, + unsigned long start_address, + unsigned int has_pae) ATTRIB_NORET; + +const extern unsigned char relocate_new_kernel[]; +extern void relocate_new_kernel_end(void); +const extern unsigned int relocate_new_kernel_size; + +/* + * A architecture hook called to validate the + * proposed image and prepare the control pages + * as needed. The pages for KEXEC_CONTROL_CODE_SIZE + * have been allocated, but the segments have yet + * been copied into the kernel. + * + * Do what every setup is needed on image and the + * reboot code buffer to allow us to avoid allocations + * later. + * + * Currently nothing. + */ +int machine_kexec_prepare(struct kimage *image) +{ + return 0; +} + +/* + * Undo anything leftover by machine_kexec_prepare + * when an image is freed. + */ +void machine_kexec_cleanup(struct kimage *image) +{ +} + +/* + * Do not allocate memory (or fail in any way) in machine_kexec(). + * We are past the point of no return, committed to rebooting now. + */ +NORET_TYPE void machine_kexec(struct kimage *image) +{ + unsigned long page_list; + unsigned long reboot_code_buffer; + + relocate_new_kernel_t rnk; + + /* Interrupts aren't acceptable while we reboot */ + local_irq_disable(); + + /* Compute some offsets */ + reboot_code_buffer = page_to_pfn(image->control_code_page) + << PAGE_SHIFT; + page_list = image->head; + + /* Set up an identity mapping for the reboot_code_buffer */ + identity_map_page(reboot_code_buffer); + + /* copy it out */ + memcpy((void *)reboot_code_buffer, relocate_new_kernel, + relocate_new_kernel_size); + + /* The segment registers are funny things, they are + * automatically loaded from a table, in memory wherever you + * set them to a specific selector, but this table is never + * accessed again you set the segment to a different selector. + * + * The more common model is are caches where the behide + * the scenes work is done, but is also dropped at arbitrary + * times. + * + * I take advantage of this here by force loading the + * segments, before I zap the gdt with an invalid value. + */ + load_segments(); + /* The gdt & idt are now invalid. + * If you want to load them you must set up your own idt & gdt. + */ + set_gdt(phys_to_virt(0),0); + set_idt(phys_to_virt(0),0); + + /* now call it */ + rnk = (relocate_new_kernel_t) reboot_code_buffer; + (*rnk)(page_list, reboot_code_buffer, image->start, cpu_has_pae); +} diff --git a/arch/i386/kernel/mpparse.c b/arch/i386/kernel/mpparse.c index 383a11600d2c..af917f609c7d 100644 --- a/arch/i386/kernel/mpparse.c +++ b/arch/i386/kernel/mpparse.c @@ -67,7 +67,6 @@ unsigned long mp_lapic_addr; /* Processor that is doing the boot up */ unsigned int boot_cpu_physical_apicid = -1U; -unsigned int boot_cpu_logical_apicid = -1U; /* Internal processor count */ static unsigned int __initdata num_processors; @@ -180,7 +179,6 @@ static void __init MP_processor_info (struct mpc_config_processor *m) if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) { Dprintk(" Bootup CPU\n"); boot_cpu_physical_apicid = m->mpc_apicid; - boot_cpu_logical_apicid = apicid; } if (num_processors >= NR_CPUS) { diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c index aea2ce1145df..5f8cfa6b7940 100644 --- a/arch/i386/kernel/process.c +++ b/arch/i386/kernel/process.c @@ -13,6 +13,7 @@ #include <stdarg.h> +#include <linux/cpu.h> #include <linux/errno.h> #include <linux/sched.h> #include <linux/fs.h> @@ -55,6 +56,9 @@ #include <linux/irq.h> #include <linux/err.h> +#include <asm/tlbflush.h> +#include <asm/cpu.h> + asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); static int hlt_counter; @@ -143,14 +147,42 @@ static void poll_idle (void) } } +#ifdef CONFIG_HOTPLUG_CPU +#include <asm/nmi.h> +/* We don't actually take CPU down, just spin without interrupts. */ +static inline void play_dead(void) +{ + /* This must be done before dead CPU ack */ + cpu_exit_clear(); + wbinvd(); + mb(); + /* Ack it */ + __get_cpu_var(cpu_state) = CPU_DEAD; + + /* + * With physical CPU hotplug, we should halt the cpu + */ + local_irq_disable(); + while (1) + __asm__ __volatile__("hlt":::"memory"); +} +#else +static inline void play_dead(void) +{ + BUG(); +} +#endif /* CONFIG_HOTPLUG_CPU */ + /* * The idle thread. There's no useful work to be * done, so just try to conserve power and have a * low exit latency (ie sit in a loop waiting for * somebody to say that they'd like to reschedule) */ -void cpu_idle (void) +void cpu_idle(void) { + int cpu = raw_smp_processor_id(); + /* endless idle loop with no priority at all */ while (1) { while (!need_resched()) { @@ -165,6 +197,9 @@ void cpu_idle (void) if (!idle) idle = default_idle; + if (cpu_is_offline(cpu)) + play_dead(); + __get_cpu_var(irq_stat).idle_timestamp = jiffies; idle(); } @@ -223,7 +258,7 @@ static void mwait_idle(void) } } -void __init select_idle_routine(const struct cpuinfo_x86 *c) +void __devinit select_idle_routine(const struct cpuinfo_x86 *c) { if (cpu_has(c, X86_FEATURE_MWAIT)) { printk("monitor/mwait feature present.\n"); diff --git a/arch/i386/kernel/reboot.c b/arch/i386/kernel/reboot.c index db912209a8d3..b3e584849961 100644 --- a/arch/i386/kernel/reboot.c +++ b/arch/i386/kernel/reboot.c @@ -26,7 +26,6 @@ static int reboot_mode; static int reboot_thru_bios; #ifdef CONFIG_SMP -int reboot_smp = 0; static int reboot_cpu = -1; /* shamelessly grabbed from lib/vsprintf.c for readability */ #define is_digit(c) ((c) >= '0' && (c) <= '9') @@ -49,7 +48,6 @@ static int __init reboot_setup(char *str) break; #ifdef CONFIG_SMP case 's': /* "smp" reboot by executing reset on BSP or other CPU*/ - reboot_smp = 1; if (is_digit(*(str+1))) { reboot_cpu = (int) (*(str+1) - '0'); if (is_digit(*(str+2))) @@ -88,33 +86,9 @@ static int __init set_bios_reboot(struct dmi_system_id *d) return 0; } -/* - * Some machines require the "reboot=s" commandline option, this quirk makes that automatic. - */ -static int __init set_smp_reboot(struct dmi_system_id *d) -{ -#ifdef CONFIG_SMP - if (!reboot_smp) { - reboot_smp = 1; - printk(KERN_INFO "%s series board detected. Selecting SMP-method for reboots.\n", d->ident); - } -#endif - return 0; -} - -/* - * Some machines require the "reboot=b,s" commandline option, this quirk makes that automatic. - */ -static int __init set_smp_bios_reboot(struct dmi_system_id *d) -{ - set_smp_reboot(d); - set_bios_reboot(d); - return 0; -} - static struct dmi_system_id __initdata reboot_dmi_table[] = { { /* Handle problems with rebooting on Dell 1300's */ - .callback = set_smp_bios_reboot, + .callback = set_bios_reboot, .ident = "Dell PowerEdge 1300", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"), @@ -301,41 +275,32 @@ void machine_real_restart(unsigned char *code, int length) EXPORT_SYMBOL(machine_real_restart); #endif -void machine_restart(char * __unused) +void machine_shutdown(void) { #ifdef CONFIG_SMP - int cpuid; - - cpuid = GET_APIC_ID(apic_read(APIC_ID)); - - if (reboot_smp) { - - /* check to see if reboot_cpu is valid - if its not, default to the BSP */ - if ((reboot_cpu == -1) || - (reboot_cpu > (NR_CPUS -1)) || - !physid_isset(cpuid, phys_cpu_present_map)) - reboot_cpu = boot_cpu_physical_apicid; - - reboot_smp = 0; /* use this as a flag to only go through this once*/ - /* re-run this function on the other CPUs - it will fall though this section since we have - cleared reboot_smp, and do the reboot if it is the - correct CPU, otherwise it halts. */ - if (reboot_cpu != cpuid) - smp_call_function((void *)machine_restart , NULL, 1, 0); + int reboot_cpu_id; + + /* The boot cpu is always logical cpu 0 */ + reboot_cpu_id = 0; + + /* See if there has been given a command line override */ + if ((reboot_cpu_id != -1) && (reboot_cpu < NR_CPUS) && + cpu_isset(reboot_cpu, cpu_online_map)) { + reboot_cpu_id = reboot_cpu; } - /* if reboot_cpu is still -1, then we want a tradional reboot, - and if we are not running on the reboot_cpu,, halt */ - if ((reboot_cpu != -1) && (cpuid != reboot_cpu)) { - for (;;) - __asm__ __volatile__ ("hlt"); + /* Make certain the cpu I'm rebooting on is online */ + if (!cpu_isset(reboot_cpu_id, cpu_online_map)) { + reboot_cpu_id = smp_processor_id(); } - /* - * Stop all CPUs and turn off local APICs and the IO-APIC, so - * other OSs see a clean IRQ state. + + /* Make certain I only run on the appropriate processor */ + set_cpus_allowed(current, cpumask_of_cpu(reboot_cpu_id)); + + /* O.K. Now that I'm on the appropriate processor, stop + * all of the others, and disable their local APICs. */ + smp_send_stop(); #endif /* CONFIG_SMP */ @@ -344,6 +309,11 @@ void machine_restart(char * __unused) #ifdef CONFIG_X86_IO_APIC disable_IO_APIC(); #endif +} + +void machine_restart(char * __unused) +{ + machine_shutdown(); if (!reboot_thru_bios) { if (efi_enabled) { diff --git a/arch/i386/kernel/relocate_kernel.S b/arch/i386/kernel/relocate_kernel.S new file mode 100644 index 000000000000..d312616effa1 --- /dev/null +++ b/arch/i386/kernel/relocate_kernel.S @@ -0,0 +1,120 @@ +/* + * relocate_kernel.S - put the kernel image in place to boot + * Copyright (C) 2002-2004 Eric Biederman <ebiederm@xmission.com> + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#include <linux/linkage.h> + + /* + * Must be relocatable PIC code callable as a C function, that once + * it starts can not use the previous processes stack. + */ + .globl relocate_new_kernel +relocate_new_kernel: + /* read the arguments and say goodbye to the stack */ + movl 4(%esp), %ebx /* page_list */ + movl 8(%esp), %ebp /* reboot_code_buffer */ + movl 12(%esp), %edx /* start address */ + movl 16(%esp), %ecx /* cpu_has_pae */ + + /* zero out flags, and disable interrupts */ + pushl $0 + popfl + + /* set a new stack at the bottom of our page... */ + lea 4096(%ebp), %esp + + /* store the parameters back on the stack */ + pushl %edx /* store the start address */ + + /* Set cr0 to a known state: + * 31 0 == Paging disabled + * 18 0 == Alignment check disabled + * 16 0 == Write protect disabled + * 3 0 == No task switch + * 2 0 == Don't do FP software emulation. + * 0 1 == Proctected mode enabled + */ + movl %cr0, %eax + andl $~((1<<31)|(1<<18)|(1<<16)|(1<<3)|(1<<2)), %eax + orl $(1<<0), %eax + movl %eax, %cr0 + + /* clear cr4 if applicable */ + testl %ecx, %ecx + jz 1f + /* Set cr4 to a known state: + * Setting everything to zero seems safe. + */ + movl %cr4, %eax + andl $0, %eax + movl %eax, %cr4 + + jmp 1f +1: + + /* Flush the TLB (needed?) */ + xorl %eax, %eax + movl %eax, %cr3 + + /* Do the copies */ + movl %ebx, %ecx + jmp 1f + +0: /* top, read another word from the indirection page */ + movl (%ebx), %ecx + addl $4, %ebx +1: + testl $0x1, %ecx /* is it a destination page */ + jz 2f + movl %ecx, %edi + andl $0xfffff000, %edi + jmp 0b +2: + testl $0x2, %ecx /* is it an indirection page */ + jz 2f + movl %ecx, %ebx + andl $0xfffff000, %ebx + jmp 0b +2: + testl $0x4, %ecx /* is it the done indicator */ + jz 2f + jmp 3f +2: + testl $0x8, %ecx /* is it the source indicator */ + jz 0b /* Ignore it otherwise */ + movl %ecx, %esi /* For every source page do a copy */ + andl $0xfffff000, %esi + + movl $1024, %ecx + rep ; movsl + jmp 0b + +3: + + /* To be certain of avoiding problems with self-modifying code + * I need to execute a serializing instruction here. + * So I flush the TLB, it's handy, and not processor dependent. + */ + xorl %eax, %eax + movl %eax, %cr3 + + /* set all of the registers to known values */ + /* leave %esp alone */ + + xorl %eax, %eax + xorl %ebx, %ebx + xorl %ecx, %ecx + xorl %edx, %edx + xorl %esi, %esi + xorl %edi, %edi + xorl %ebp, %ebp + ret +relocate_new_kernel_end: + + .globl relocate_new_kernel_size +relocate_new_kernel_size: + .long relocate_new_kernel_end - relocate_new_kernel diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index 30406fd0b64c..7306353c520e 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -43,7 +43,12 @@ #include <linux/init.h> #include <linux/edd.h> #include <linux/nodemask.h> +#include <linux/kexec.h> +#include <linux/crash_dump.h> + #include <video/edid.h> + +#include <asm/apic.h> #include <asm/e820.h> #include <asm/mpspec.h> #include <asm/setup.h> @@ -55,12 +60,15 @@ #include "setup_arch_pre.h" #include <bios_ebda.h> +/* Forward Declaration. */ +void __init find_max_pfn(void); + /* This value is set up by the early boot code to point to the value immediately after the boot time page tables. It contains a *physical* address, and must not be in the .bss segment! */ unsigned long init_pg_tables_end __initdata = ~0UL; -int disable_pse __initdata = 0; +int disable_pse __devinitdata = 0; /* * Machine setup.. @@ -732,6 +740,15 @@ static void __init parse_cmdline_early (char ** cmdline_p) if (to != command_line) to--; if (!memcmp(from+7, "exactmap", 8)) { +#ifdef CONFIG_CRASH_DUMP + /* If we are doing a crash dump, we + * still need to know the real mem + * size before original memory map is + * reset. + */ + find_max_pfn(); + saved_max_pfn = max_pfn; +#endif from += 8+7; e820.nr_map = 0; userdef = 1; @@ -835,6 +852,44 @@ static void __init parse_cmdline_early (char ** cmdline_p) #endif /* CONFIG_X86_LOCAL_APIC */ #endif /* CONFIG_ACPI_BOOT */ +#ifdef CONFIG_X86_LOCAL_APIC + /* enable local APIC */ + else if (!memcmp(from, "lapic", 5)) + lapic_enable(); + + /* disable local APIC */ + else if (!memcmp(from, "nolapic", 6)) + lapic_disable(); +#endif /* CONFIG_X86_LOCAL_APIC */ + +#ifdef CONFIG_KEXEC + /* crashkernel=size@addr specifies the location to reserve for + * a crash kernel. By reserving this memory we guarantee + * that linux never set's it up as a DMA target. + * Useful for holding code to do something appropriate + * after a kernel panic. + */ + else if (!memcmp(from, "crashkernel=", 12)) { + unsigned long size, base; + size = memparse(from+12, &from); + if (*from == '@') { + base = memparse(from+1, &from); + /* FIXME: Do I want a sanity check + * to validate the memory range? + */ + crashk_res.start = base; + crashk_res.end = base + size - 1; + } + } +#endif +#ifdef CONFIG_CRASH_DUMP + /* elfcorehdr= specifies the location of elf core header + * stored by the crashed kernel. + */ + else if (!memcmp(from, "elfcorehdr=", 11)) + elfcorehdr_addr = memparse(from+11, &from); +#endif + /* * highmem=size forces highmem to be exactly 'size' bytes. * This works even on boxes that have no highmem otherwise. @@ -1113,8 +1168,8 @@ void __init setup_bootmem_allocator(void) * the (very unlikely) case of us accidentally initializing the * bootmem allocator with an invalid RAM area. */ - reserve_bootmem(HIGH_MEMORY, (PFN_PHYS(min_low_pfn) + - bootmap_size + PAGE_SIZE-1) - (HIGH_MEMORY)); + reserve_bootmem(__PHYSICAL_START, (PFN_PHYS(min_low_pfn) + + bootmap_size + PAGE_SIZE-1) - (__PHYSICAL_START)); /* * reserve physical page 0 - it's a special BIOS page on many boxes, @@ -1170,6 +1225,11 @@ void __init setup_bootmem_allocator(void) } } #endif +#ifdef CONFIG_KEXEC + if (crashk_res.start != crashk_res.end) + reserve_bootmem(crashk_res.start, + crashk_res.end - crashk_res.start + 1); +#endif } /* @@ -1223,6 +1283,9 @@ legacy_init_iomem_resources(struct resource *code_resource, struct resource *dat */ request_resource(res, code_resource); request_resource(res, data_resource); +#ifdef CONFIG_KEXEC + request_resource(res, &crashk_res); +#endif } } } diff --git a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c index 68be7d0c7238..cec4bde67161 100644 --- a/arch/i386/kernel/smp.c +++ b/arch/i386/kernel/smp.c @@ -19,6 +19,7 @@ #include <linux/mc146818rtc.h> #include <linux/cache.h> #include <linux/interrupt.h> +#include <linux/cpu.h> #include <linux/module.h> #include <asm/mtrr.h> @@ -164,7 +165,7 @@ void send_IPI_mask_bitmask(cpumask_t cpumask, int vector) unsigned long flags; local_irq_save(flags); - + WARN_ON(mask & ~cpus_addr(cpu_online_map)[0]); /* * Wait for idle. */ @@ -346,21 +347,21 @@ out: static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm, unsigned long va) { - cpumask_t tmp; /* * A couple of (to be removed) sanity checks: * - * - we do not send IPIs to not-yet booted CPUs. * - current CPU must not be in mask * - mask must exist :) */ BUG_ON(cpus_empty(cpumask)); - - cpus_and(tmp, cpumask, cpu_online_map); - BUG_ON(!cpus_equal(cpumask, tmp)); BUG_ON(cpu_isset(smp_processor_id(), cpumask)); BUG_ON(!mm); + /* If a CPU which we ran on has gone down, OK. */ + cpus_and(cpumask, cpumask, cpu_online_map); + if (cpus_empty(cpumask)) + return; + /* * i'm not happy about this global shared spinlock in the * MM hot path, but we'll see how contended it is. @@ -476,6 +477,7 @@ void flush_tlb_all(void) */ void smp_send_reschedule(int cpu) { + WARN_ON(cpu_is_offline(cpu)); send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR); } @@ -493,6 +495,16 @@ struct call_data_struct { int wait; }; +void lock_ipi_call_lock(void) +{ + spin_lock_irq(&call_lock); +} + +void unlock_ipi_call_lock(void) +{ + spin_unlock_irq(&call_lock); +} + static struct call_data_struct * call_data; /* @@ -516,10 +528,15 @@ int smp_call_function (void (*func) (void *info), void *info, int nonatomic, */ { struct call_data_struct data; - int cpus = num_online_cpus()-1; + int cpus; - if (!cpus) + /* Holding any lock stops cpus from going down. */ + spin_lock(&call_lock); + cpus = num_online_cpus() - 1; + if (!cpus) { + spin_unlock(&call_lock); return 0; + } /* Can deadlock when called with interrupts disabled */ WARN_ON(irqs_disabled()); @@ -531,7 +548,6 @@ int smp_call_function (void (*func) (void *info), void *info, int nonatomic, if (wait) atomic_set(&data.finished, 0); - spin_lock(&call_lock); call_data = &data; mb(); diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index c20d96d5c15c..d66bf489a2e9 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -44,6 +44,9 @@ #include <linux/smp_lock.h> #include <linux/irq.h> #include <linux/bootmem.h> +#include <linux/notifier.h> +#include <linux/cpu.h> +#include <linux/percpu.h> #include <linux/delay.h> #include <linux/mc146818rtc.h> @@ -56,18 +59,28 @@ #include <smpboot_hooks.h> /* Set if we find a B stepping CPU */ -static int __initdata smp_b_stepping; +static int __devinitdata smp_b_stepping; /* Number of siblings per CPU package */ int smp_num_siblings = 1; #ifdef CONFIG_X86_HT EXPORT_SYMBOL(smp_num_siblings); #endif -int phys_proc_id[NR_CPUS]; /* Package ID of each logical CPU */ + +/* Package ID of each logical CPU */ +int phys_proc_id[NR_CPUS] = {[0 ... NR_CPUS-1] = BAD_APICID}; EXPORT_SYMBOL(phys_proc_id); -int cpu_core_id[NR_CPUS]; /* Core ID of each logical CPU */ + +/* Core ID of each logical CPU */ +int cpu_core_id[NR_CPUS] = {[0 ... NR_CPUS-1] = BAD_APICID}; EXPORT_SYMBOL(cpu_core_id); +cpumask_t cpu_sibling_map[NR_CPUS]; +EXPORT_SYMBOL(cpu_sibling_map); + +cpumask_t cpu_core_map[NR_CPUS]; +EXPORT_SYMBOL(cpu_core_map); + /* bitmap of online cpus */ cpumask_t cpu_online_map; EXPORT_SYMBOL(cpu_online_map); @@ -77,6 +90,12 @@ cpumask_t cpu_callout_map; EXPORT_SYMBOL(cpu_callout_map); static cpumask_t smp_commenced_mask; +/* TSC's upper 32 bits can't be written in eariler CPU (before prescott), there + * is no way to resync one AP against BP. TBD: for prescott and above, we + * should use IA64's algorithm + */ +static int __devinitdata tsc_sync_disabled; + /* Per CPU bogomips and other parameters */ struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned; EXPORT_SYMBOL(cpu_data); @@ -96,13 +115,16 @@ static int trampoline_exec; static void map_cpu_to_logical_apicid(void); +/* State of each CPU. */ +DEFINE_PER_CPU(int, cpu_state) = { 0 }; + /* * Currently trivial. Write the real->protected mode * bootstrap into the page concerned. The caller * has made sure it's suitably aligned. */ -static unsigned long __init setup_trampoline(void) +static unsigned long __devinit setup_trampoline(void) { memcpy(trampoline_base, trampoline_data, trampoline_end - trampoline_data); return virt_to_phys(trampoline_base); @@ -132,7 +154,7 @@ void __init smp_alloc_memory(void) * a given CPU */ -static void __init smp_store_cpu_info(int id) +static void __devinit smp_store_cpu_info(int id) { struct cpuinfo_x86 *c = cpu_data + id; @@ -326,7 +348,7 @@ extern void calibrate_delay(void); static atomic_t init_deasserted; -static void __init smp_callin(void) +static void __devinit smp_callin(void) { int cpuid, phys_id; unsigned long timeout; @@ -411,16 +433,48 @@ static void __init smp_callin(void) /* * Synchronize the TSC with the BP */ - if (cpu_has_tsc && cpu_khz) + if (cpu_has_tsc && cpu_khz && !tsc_sync_disabled) synchronize_tsc_ap(); } static int cpucount; +static inline void +set_cpu_sibling_map(int cpu) +{ + int i; + + if (smp_num_siblings > 1) { + for (i = 0; i < NR_CPUS; i++) { + if (!cpu_isset(i, cpu_callout_map)) + continue; + if (cpu_core_id[cpu] == cpu_core_id[i]) { + cpu_set(i, cpu_sibling_map[cpu]); + cpu_set(cpu, cpu_sibling_map[i]); + } + } + } else { + cpu_set(cpu, cpu_sibling_map[cpu]); + } + + if (current_cpu_data.x86_num_cores > 1) { + for (i = 0; i < NR_CPUS; i++) { + if (!cpu_isset(i, cpu_callout_map)) + continue; + if (phys_proc_id[cpu] == phys_proc_id[i]) { + cpu_set(i, cpu_core_map[cpu]); + cpu_set(cpu, cpu_core_map[i]); + } + } + } else { + cpu_core_map[cpu] = cpu_sibling_map[cpu]; + } +} + /* * Activate a secondary processor. */ -static void __init start_secondary(void *unused) +static void __devinit start_secondary(void *unused) { /* * Dont put anything before smp_callin(), SMP @@ -443,7 +497,23 @@ static void __init start_secondary(void *unused) * the local TLBs too. */ local_flush_tlb(); + + /* This must be done before setting cpu_online_map */ + set_cpu_sibling_map(raw_smp_processor_id()); + wmb(); + + /* + * We need to hold call_lock, so there is no inconsistency + * between the time smp_call_function() determines number of + * IPI receipients, and the time when the determination is made + * for which cpus receive the IPI. Holding this + * lock helps us to not include this cpu in a currently in progress + * smp_call_function(). + */ + lock_ipi_call_lock(); cpu_set(smp_processor_id(), cpu_online_map); + unlock_ipi_call_lock(); + per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; /* We can take interrupts now: we're officially "up". */ local_irq_enable(); @@ -458,7 +528,7 @@ static void __init start_secondary(void *unused) * from the task structure * This function must not return. */ -void __init initialize_secondary(void) +void __devinit initialize_secondary(void) { /* * We don't actually need to load the full TSS, @@ -572,7 +642,7 @@ static inline void __inquire_remote_apic(int apicid) * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this * won't ... remember to clear down the APIC, etc later. */ -static int __init +static int __devinit wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip) { unsigned long send_status = 0, accept_status = 0; @@ -618,7 +688,7 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip) #endif /* WAKE_SECONDARY_VIA_NMI */ #ifdef WAKE_SECONDARY_VIA_INIT -static int __init +static int __devinit wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) { unsigned long send_status = 0, accept_status = 0; @@ -753,8 +823,43 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) #endif /* WAKE_SECONDARY_VIA_INIT */ extern cpumask_t cpu_initialized; +static inline int alloc_cpu_id(void) +{ + cpumask_t tmp_map; + int cpu; + cpus_complement(tmp_map, cpu_present_map); + cpu = first_cpu(tmp_map); + if (cpu >= NR_CPUS) + return -ENODEV; + return cpu; +} + +#ifdef CONFIG_HOTPLUG_CPU +static struct task_struct * __devinitdata cpu_idle_tasks[NR_CPUS]; +static inline struct task_struct * alloc_idle_task(int cpu) +{ + struct task_struct *idle; + + if ((idle = cpu_idle_tasks[cpu]) != NULL) { + /* initialize thread_struct. we really want to avoid destroy + * idle tread + */ + idle->thread.esp = (unsigned long)(((struct pt_regs *) + (THREAD_SIZE + (unsigned long) idle->thread_info)) - 1); + init_idle(idle, cpu); + return idle; + } + idle = fork_idle(cpu); + + if (!IS_ERR(idle)) + cpu_idle_tasks[cpu] = idle; + return idle; +} +#else +#define alloc_idle_task(cpu) fork_idle(cpu) +#endif -static int __init do_boot_cpu(int apicid) +static int __devinit do_boot_cpu(int apicid, int cpu) /* * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad * (ie clustered apic addressing mode), this is a LOGICAL apic ID. @@ -763,16 +868,17 @@ static int __init do_boot_cpu(int apicid) { struct task_struct *idle; unsigned long boot_error; - int timeout, cpu; + int timeout; unsigned long start_eip; unsigned short nmi_high = 0, nmi_low = 0; - cpu = ++cpucount; + ++cpucount; + /* * We can't use kernel_thread since we must avoid to * reschedule the child. */ - idle = fork_idle(cpu); + idle = alloc_idle_task(cpu); if (IS_ERR(idle)) panic("failed fork for CPU %d", cpu); idle->thread.eip = (unsigned long) start_secondary; @@ -839,13 +945,16 @@ static int __init do_boot_cpu(int apicid) inquire_remote_apic(apicid); } } - x86_cpu_to_apicid[cpu] = apicid; + if (boot_error) { /* Try to put things back the way they were before ... */ unmap_cpu_to_logical_apicid(cpu); cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */ cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */ cpucount--; + } else { + x86_cpu_to_apicid[cpu] = apicid; + cpu_set(cpu, cpu_present_map); } /* mark "stuck" area as not stuck */ @@ -854,6 +963,75 @@ static int __init do_boot_cpu(int apicid) return boot_error; } +#ifdef CONFIG_HOTPLUG_CPU +void cpu_exit_clear(void) +{ + int cpu = raw_smp_processor_id(); + + idle_task_exit(); + + cpucount --; + cpu_uninit(); + irq_ctx_exit(cpu); + + cpu_clear(cpu, cpu_callout_map); + cpu_clear(cpu, cpu_callin_map); + cpu_clear(cpu, cpu_present_map); + + cpu_clear(cpu, smp_commenced_mask); + unmap_cpu_to_logical_apicid(cpu); +} + +struct warm_boot_cpu_info { + struct completion *complete; + int apicid; + int cpu; +}; + +static void __devinit do_warm_boot_cpu(void *p) +{ + struct warm_boot_cpu_info *info = p; + do_boot_cpu(info->apicid, info->cpu); + complete(info->complete); +} + +int __devinit smp_prepare_cpu(int cpu) +{ + DECLARE_COMPLETION(done); + struct warm_boot_cpu_info info; + struct work_struct task; + int apicid, ret; + + lock_cpu_hotplug(); + apicid = x86_cpu_to_apicid[cpu]; + if (apicid == BAD_APICID) { + ret = -ENODEV; + goto exit; + } + + info.complete = &done; + info.apicid = apicid; + info.cpu = cpu; + INIT_WORK(&task, do_warm_boot_cpu, &info); + + tsc_sync_disabled = 1; + + /* init low mem mapping */ + memcpy(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, + sizeof(swapper_pg_dir[0]) * KERNEL_PGD_PTRS); + flush_tlb_all(); + schedule_work(&task); + wait_for_completion(&done); + + tsc_sync_disabled = 0; + zap_low_mappings(); + ret = 0; +exit: + unlock_cpu_hotplug(); + return ret; +} +#endif + static void smp_tune_scheduling (void) { unsigned long cachesize; /* kB */ @@ -895,13 +1073,6 @@ void *xquad_portio; EXPORT_SYMBOL(xquad_portio); #endif -cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned; -#ifdef CONFIG_X86_HT -EXPORT_SYMBOL(cpu_sibling_map); -#endif -cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned; -EXPORT_SYMBOL(cpu_core_map); - static void __init smp_boot_cpus(unsigned int max_cpus) { int apicid, cpu, bit, kicked; @@ -1013,7 +1184,7 @@ static void __init smp_boot_cpus(unsigned int max_cpus) if (max_cpus <= cpucount+1) continue; - if (do_boot_cpu(apicid)) + if (((cpu = alloc_cpu_id()) <= 0) || do_boot_cpu(apicid, cpu)) printk("CPU #%d not responding - cannot use it.\n", apicid); else @@ -1065,44 +1236,8 @@ static void __init smp_boot_cpus(unsigned int max_cpus) cpus_clear(cpu_core_map[cpu]); } - for (cpu = 0; cpu < NR_CPUS; cpu++) { - struct cpuinfo_x86 *c = cpu_data + cpu; - int siblings = 0; - int i; - if (!cpu_isset(cpu, cpu_callout_map)) - continue; - - if (smp_num_siblings > 1) { - for (i = 0; i < NR_CPUS; i++) { - if (!cpu_isset(i, cpu_callout_map)) - continue; - if (cpu_core_id[cpu] == cpu_core_id[i]) { - siblings++; - cpu_set(i, cpu_sibling_map[cpu]); - } - } - } else { - siblings++; - cpu_set(cpu, cpu_sibling_map[cpu]); - } - - if (siblings != smp_num_siblings) { - printk(KERN_WARNING "WARNING: %d siblings found for CPU%d, should be %d\n", siblings, cpu, smp_num_siblings); - smp_num_siblings = siblings; - } - - if (c->x86_num_cores > 1) { - for (i = 0; i < NR_CPUS; i++) { - if (!cpu_isset(i, cpu_callout_map)) - continue; - if (phys_proc_id[cpu] == phys_proc_id[i]) { - cpu_set(i, cpu_core_map[cpu]); - } - } - } else { - cpu_core_map[cpu] = cpu_sibling_map[cpu]; - } - } + cpu_set(0, cpu_sibling_map[0]); + cpu_set(0, cpu_core_map[0]); smpboot_setup_io_apic(); @@ -1119,6 +1254,9 @@ static void __init smp_boot_cpus(unsigned int max_cpus) who understands all this stuff should rewrite it properly. --RR 15/Jul/02 */ void __init smp_prepare_cpus(unsigned int max_cpus) { + smp_commenced_mask = cpumask_of_cpu(0); + cpu_callin_map = cpumask_of_cpu(0); + mb(); smp_boot_cpus(max_cpus); } @@ -1126,23 +1264,98 @@ void __devinit smp_prepare_boot_cpu(void) { cpu_set(smp_processor_id(), cpu_online_map); cpu_set(smp_processor_id(), cpu_callout_map); + cpu_set(smp_processor_id(), cpu_present_map); + per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; } -int __devinit __cpu_up(unsigned int cpu) +#ifdef CONFIG_HOTPLUG_CPU +static void +remove_siblinginfo(int cpu) { - /* This only works at boot for x86. See "rewrite" above. */ - if (cpu_isset(cpu, smp_commenced_mask)) { - local_irq_enable(); - return -ENOSYS; + int sibling; + + for_each_cpu_mask(sibling, cpu_sibling_map[cpu]) + cpu_clear(cpu, cpu_sibling_map[sibling]); + for_each_cpu_mask(sibling, cpu_core_map[cpu]) + cpu_clear(cpu, cpu_core_map[sibling]); + cpus_clear(cpu_sibling_map[cpu]); + cpus_clear(cpu_core_map[cpu]); + phys_proc_id[cpu] = BAD_APICID; + cpu_core_id[cpu] = BAD_APICID; +} + +int __cpu_disable(void) +{ + cpumask_t map = cpu_online_map; + int cpu = smp_processor_id(); + + /* + * Perhaps use cpufreq to drop frequency, but that could go + * into generic code. + * + * We won't take down the boot processor on i386 due to some + * interrupts only being able to be serviced by the BSP. + * Especially so if we're not using an IOAPIC -zwane + */ + if (cpu == 0) + return -EBUSY; + + /* We enable the timer again on the exit path of the death loop */ + disable_APIC_timer(); + /* Allow any queued timer interrupts to get serviced */ + local_irq_enable(); + mdelay(1); + local_irq_disable(); + + remove_siblinginfo(cpu); + + cpu_clear(cpu, map); + fixup_irqs(map); + /* It's now safe to remove this processor from the online map */ + cpu_clear(cpu, cpu_online_map); + return 0; +} + +void __cpu_die(unsigned int cpu) +{ + /* We don't do anything here: idle task is faking death itself. */ + unsigned int i; + + for (i = 0; i < 10; i++) { + /* They ack this in play_dead by setting CPU_DEAD */ + if (per_cpu(cpu_state, cpu) == CPU_DEAD) { + printk ("CPU %d is now offline\n", cpu); + return; + } + current->state = TASK_UNINTERRUPTIBLE; + schedule_timeout(HZ/10); } + printk(KERN_ERR "CPU %u didn't die...\n", cpu); +} +#else /* ... !CONFIG_HOTPLUG_CPU */ +int __cpu_disable(void) +{ + return -ENOSYS; +} + +void __cpu_die(unsigned int cpu) +{ + /* We said "no" in __cpu_disable */ + BUG(); +} +#endif /* CONFIG_HOTPLUG_CPU */ +int __devinit __cpu_up(unsigned int cpu) +{ /* In case one didn't come up */ if (!cpu_isset(cpu, cpu_callin_map)) { + printk(KERN_DEBUG "skipping cpu%d, didn't come online\n", cpu); local_irq_enable(); return -EIO; } local_irq_enable(); + per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; /* Unleash the CPU! */ cpu_set(cpu, smp_commenced_mask); while (!cpu_isset(cpu, cpu_online_map)) @@ -1156,10 +1369,12 @@ void __init smp_cpus_done(unsigned int max_cpus) setup_ioapic_dest(); #endif zap_low_mappings(); +#ifndef CONFIG_HOTPLUG_CPU /* * Disable executability of the SMP trampoline: */ set_kernel_exec((unsigned long)trampoline_base, trampoline_exec); +#endif } void __init smp_intr_init(void) diff --git a/arch/i386/kernel/syscall_table.S b/arch/i386/kernel/syscall_table.S index d408afaf6495..442a6e937b19 100644 --- a/arch/i386/kernel/syscall_table.S +++ b/arch/i386/kernel/syscall_table.S @@ -283,7 +283,7 @@ ENTRY(sys_call_table) .long sys_mq_timedreceive /* 280 */ .long sys_mq_notify .long sys_mq_getsetattr - .long sys_ni_syscall /* reserved for kexec */ + .long sys_kexec_load .long sys_waitid .long sys_ni_syscall /* 285 */ /* available */ .long sys_add_key diff --git a/arch/i386/kernel/sysenter.c b/arch/i386/kernel/sysenter.c index 960d8bd137d0..0bada1870bdf 100644 --- a/arch/i386/kernel/sysenter.c +++ b/arch/i386/kernel/sysenter.c @@ -21,11 +21,16 @@ extern asmlinkage void sysenter_entry(void); -void enable_sep_cpu(void *info) +void enable_sep_cpu(void) { int cpu = get_cpu(); struct tss_struct *tss = &per_cpu(init_tss, cpu); + if (!boot_cpu_has(X86_FEATURE_SEP)) { + put_cpu(); + return; + } + tss->ss1 = __KERNEL_CS; tss->esp1 = sizeof(struct tss_struct) + (unsigned long) tss; wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0); @@ -41,7 +46,7 @@ void enable_sep_cpu(void *info) extern const char vsyscall_int80_start, vsyscall_int80_end; extern const char vsyscall_sysenter_start, vsyscall_sysenter_end; -static int __init sysenter_setup(void) +int __init sysenter_setup(void) { void *page = (void *)get_zeroed_page(GFP_ATOMIC); @@ -58,8 +63,5 @@ static int __init sysenter_setup(void) &vsyscall_sysenter_start, &vsyscall_sysenter_end - &vsyscall_sysenter_start); - on_each_cpu(enable_sep_cpu, NULL, 1, 1); return 0; } - -__initcall(sysenter_setup); diff --git a/arch/i386/kernel/time_hpet.c b/arch/i386/kernel/time_hpet.c index 10a0cbb88e75..658c0629ba6a 100644 --- a/arch/i386/kernel/time_hpet.c +++ b/arch/i386/kernel/time_hpet.c @@ -50,7 +50,7 @@ static void hpet_writel(unsigned long d, unsigned long a) * comparator value and continue. Next tick can be caught by checking * for a change in the comparator value. Used in apic.c. */ -static void __init wait_hpet_tick(void) +static void __devinit wait_hpet_tick(void) { unsigned int start_cmp_val, end_cmp_val; diff --git a/arch/i386/kernel/timers/common.c b/arch/i386/kernel/timers/common.c index 37353bd31803..8163fe0cf1f0 100644 --- a/arch/i386/kernel/timers/common.c +++ b/arch/i386/kernel/timers/common.c @@ -86,7 +86,7 @@ bad_ctc: #define CALIBRATE_CNT_HPET (5 * hpet_tick) #define CALIBRATE_TIME_HPET (5 * KERNEL_TICK_USEC) -unsigned long __init calibrate_tsc_hpet(unsigned long *tsc_hpet_quotient_ptr) +unsigned long __devinit calibrate_tsc_hpet(unsigned long *tsc_hpet_quotient_ptr) { unsigned long tsc_startlow, tsc_starthigh; unsigned long tsc_endlow, tsc_endhigh; diff --git a/arch/i386/kernel/timers/timer_tsc.c b/arch/i386/kernel/timers/timer_tsc.c index 54c36b182021..f46e625bab67 100644 --- a/arch/i386/kernel/timers/timer_tsc.c +++ b/arch/i386/kernel/timers/timer_tsc.c @@ -33,7 +33,7 @@ static struct timer_opts timer_tsc; static inline void cpufreq_delayed_get(void); -int tsc_disable __initdata = 0; +int tsc_disable __devinitdata = 0; extern spinlock_t i8253_lock; diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index e4d4e2162c7a..a61f33d06ea3 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -27,6 +27,7 @@ #include <linux/ptrace.h> #include <linux/utsname.h> #include <linux/kprobes.h> +#include <linux/kexec.h> #ifdef CONFIG_EISA #include <linux/ioport.h> @@ -234,22 +235,22 @@ void show_registers(struct pt_regs *regs) * time of the fault.. */ if (in_kernel) { - u8 *eip; + u8 __user *eip; printk("\nStack: "); show_stack(NULL, (unsigned long*)esp); printk("Code: "); - eip = (u8 *)regs->eip - 43; + eip = (u8 __user *)regs->eip - 43; for (i = 0; i < 64; i++, eip++) { unsigned char c; - if (eip < (u8 *)PAGE_OFFSET || __get_user(c, eip)) { + if (eip < (u8 __user *)PAGE_OFFSET || __get_user(c, eip)) { printk(" Bad EIP value."); break; } - if (eip == (u8 *)regs->eip) + if (eip == (u8 __user *)regs->eip) printk("<%02x> ", c); else printk("%02x ", c); @@ -273,13 +274,13 @@ static void handle_BUG(struct pt_regs *regs) if (eip < PAGE_OFFSET) goto no_bug; - if (__get_user(ud2, (unsigned short *)eip)) + if (__get_user(ud2, (unsigned short __user *)eip)) goto no_bug; if (ud2 != 0x0b0f) goto no_bug; - if (__get_user(line, (unsigned short *)(eip + 2))) + if (__get_user(line, (unsigned short __user *)(eip + 2))) goto bug; - if (__get_user(file, (char **)(eip + 4)) || + if (__get_user(file, (char * __user *)(eip + 4)) || (unsigned long)file < PAGE_OFFSET || __get_user(c, file)) file = "<bad filename>"; @@ -294,6 +295,9 @@ bug: printk("Kernel BUG\n"); } +/* This is gone through when something in the kernel + * has done something bad and is about to be terminated. +*/ void die(const char * str, struct pt_regs * regs, long err) { static struct { @@ -341,6 +345,10 @@ void die(const char * str, struct pt_regs * regs, long err) bust_spinlocks(0); die.lock_owner = -1; spin_unlock_irq(&die.lock); + + if (kexec_should_crash(current)) + crash_kexec(regs); + if (in_interrupt()) panic("Fatal exception in interrupt"); @@ -361,6 +369,10 @@ static inline void die_if_kernel(const char * str, struct pt_regs * regs, long e static void do_trap(int trapnr, int signr, char *str, int vm86, struct pt_regs * regs, long error_code, siginfo_t *info) { + struct task_struct *tsk = current; + tsk->thread.error_code = error_code; + tsk->thread.trap_no = trapnr; + if (regs->eflags & VM_MASK) { if (vm86) goto vm86_trap; @@ -371,9 +383,6 @@ static void do_trap(int trapnr, int signr, char *str, int vm86, goto kernel_trap; trap_signal: { - struct task_struct *tsk = current; - tsk->thread.error_code = error_code; - tsk->thread.trap_no = trapnr; if (info) force_sig_info(signr, info, tsk); else @@ -486,6 +495,9 @@ fastcall void do_general_protection(struct pt_regs * regs, long error_code) } put_cpu(); + current->thread.error_code = error_code; + current->thread.trap_no = 13; + if (regs->eflags & VM_MASK) goto gp_in_vm86; @@ -570,6 +582,15 @@ void die_nmi (struct pt_regs *regs, const char *msg) console_silent(); spin_unlock(&nmi_print_lock); bust_spinlocks(0); + + /* If we are in kernel we are probably nested up pretty bad + * and might aswell get out now while we still can. + */ + if (!user_mode(regs)) { + current->thread.trap_no = 2; + crash_kexec(regs); + } + do_exit(SIGSEGV); } @@ -625,6 +646,14 @@ fastcall void do_nmi(struct pt_regs * regs, long error_code) nmi_enter(); cpu = smp_processor_id(); + +#ifdef CONFIG_HOTPLUG_CPU + if (!cpu_online(cpu)) { + nmi_exit(); + return; + } +#endif + ++nmi_count(cpu); if (!nmi_callback(regs, cpu)) @@ -872,9 +901,9 @@ fastcall void do_simd_coprocessor_error(struct pt_regs * regs, error_code); return; } - die_if_kernel("cache flush denied", regs, error_code); current->thread.trap_no = 19; current->thread.error_code = error_code; + die_if_kernel("cache flush denied", regs, error_code); force_sig(SIGSEGV, current); } } diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S index e0512cc8bea7..7e01a528a83a 100644 --- a/arch/i386/kernel/vmlinux.lds.S +++ b/arch/i386/kernel/vmlinux.lds.S @@ -2,20 +2,23 @@ * Written by Martin Mares <mj@atrey.karlin.mff.cuni.cz>; */ +#define LOAD_OFFSET __PAGE_OFFSET + #include <asm-generic/vmlinux.lds.h> #include <asm/thread_info.h> #include <asm/page.h> OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") OUTPUT_ARCH(i386) -ENTRY(startup_32) +ENTRY(phys_startup_32) jiffies = jiffies_64; SECTIONS { - . = __PAGE_OFFSET + 0x100000; + . = __KERNEL_START; + phys_startup_32 = startup_32 - LOAD_OFFSET; /* read-only */ _text = .; /* Text and read-only data */ - .text : { + .text : AT(ADDR(.text) - LOAD_OFFSET) { *(.text) SCHED_TEXT LOCK_TEXT @@ -27,49 +30,55 @@ SECTIONS . = ALIGN(16); /* Exception table */ __start___ex_table = .; - __ex_table : { *(__ex_table) } + __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { *(__ex_table) } __stop___ex_table = .; RODATA /* writeable */ - .data : { /* Data */ + .data : AT(ADDR(.data) - LOAD_OFFSET) { /* Data */ *(.data) CONSTRUCTORS } . = ALIGN(4096); __nosave_begin = .; - .data_nosave : { *(.data.nosave) } + .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { *(.data.nosave) } . = ALIGN(4096); __nosave_end = .; . = ALIGN(4096); - .data.page_aligned : { *(.data.idt) } + .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) { + *(.data.idt) + } . = ALIGN(32); - .data.cacheline_aligned : { *(.data.cacheline_aligned) } + .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) { + *(.data.cacheline_aligned) + } _edata = .; /* End of data section */ . = ALIGN(THREAD_SIZE); /* init_task */ - .data.init_task : { *(.data.init_task) } + .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) { + *(.data.init_task) + } /* will be freed after init */ . = ALIGN(4096); /* Init code and data */ __init_begin = .; - .init.text : { + .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) { _sinittext = .; *(.init.text) _einittext = .; } - .init.data : { *(.init.data) } + .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) { *(.init.data) } . = ALIGN(16); __setup_start = .; - .init.setup : { *(.init.setup) } + .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) { *(.init.setup) } __setup_end = .; __initcall_start = .; - .initcall.init : { + .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) { *(.initcall1.init) *(.initcall2.init) *(.initcall3.init) @@ -80,33 +89,41 @@ SECTIONS } __initcall_end = .; __con_initcall_start = .; - .con_initcall.init : { *(.con_initcall.init) } + .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) { + *(.con_initcall.init) + } __con_initcall_end = .; SECURITY_INIT . = ALIGN(4); __alt_instructions = .; - .altinstructions : { *(.altinstructions) } + .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) { + *(.altinstructions) + } __alt_instructions_end = .; - .altinstr_replacement : { *(.altinstr_replacement) } + .altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) { + *(.altinstr_replacement) + } /* .exit.text is discard at runtime, not link time, to deal with references from .altinstructions and .eh_frame */ - .exit.text : { *(.exit.text) } - .exit.data : { *(.exit.data) } + .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) { *(.exit.text) } + .exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) { *(.exit.data) } . = ALIGN(4096); __initramfs_start = .; - .init.ramfs : { *(.init.ramfs) } + .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) { *(.init.ramfs) } __initramfs_end = .; . = ALIGN(32); __per_cpu_start = .; - .data.percpu : { *(.data.percpu) } + .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { *(.data.percpu) } __per_cpu_end = .; . = ALIGN(4096); __init_end = .; /* freed after init ends here */ __bss_start = .; /* BSS */ - .bss : { + .bss.page_aligned : AT(ADDR(.bss.page_aligned) - LOAD_OFFSET) { *(.bss.page_aligned) + } + .bss : AT(ADDR(.bss) - LOAD_OFFSET) { *(.bss) } . = ALIGN(4); diff --git a/arch/i386/mach-default/setup.c b/arch/i386/mach-default/setup.c index 0aa08eaa8932..e5a1a83d09ef 100644 --- a/arch/i386/mach-default/setup.c +++ b/arch/i386/mach-default/setup.c @@ -10,6 +10,14 @@ #include <asm/acpi.h> #include <asm/arch_hooks.h> +#ifdef CONFIG_HOTPLUG_CPU +#define DEFAULT_SEND_IPI (1) +#else +#define DEFAULT_SEND_IPI (0) +#endif + +int no_broadcast=DEFAULT_SEND_IPI; + /** * pre_intr_init_hook - initialisation prior to setting up interrupt vectors * @@ -104,3 +112,22 @@ void __init mca_nmi_hook(void) printk("NMI generated from unknown source!\n"); } #endif + +static __init int no_ipi_broadcast(char *str) +{ + get_option(&str, &no_broadcast); + printk ("Using %s mode\n", no_broadcast ? "No IPI Broadcast" : + "IPI Broadcast"); + return 1; +} + +__setup("no_ipi_broadcast", no_ipi_broadcast); + +static int __init print_ipi_mode(void) +{ + printk ("Using IPI %s mode\n", no_broadcast ? "No-Shortcut" : + "Shortcut"); + return 0; +} + +late_initcall(print_ipi_mode); diff --git a/arch/i386/mach-default/topology.c b/arch/i386/mach-default/topology.c index 5b3e8817dae8..23395fff35d1 100644 --- a/arch/i386/mach-default/topology.c +++ b/arch/i386/mach-default/topology.c @@ -73,12 +73,11 @@ static int __init topology_init(void) { int i; - for (i = 0; i < MAX_NUMNODES; i++) { - if (node_online(i)) - arch_register_node(i); - } - for (i = 0; i < NR_CPUS; i++) - if (cpu_possible(i)) arch_register_cpu(i); + for_each_online_node(i) + arch_register_node(i); + + for_each_cpu(i) + arch_register_cpu(i); return 0; } @@ -88,8 +87,8 @@ static int __init topology_init(void) { int i; - for (i = 0; i < NR_CPUS; i++) - if (cpu_possible(i)) arch_register_cpu(i); + for_each_cpu(i) + arch_register_cpu(i); return 0; } diff --git a/arch/i386/mach-visws/mpparse.c b/arch/i386/mach-visws/mpparse.c index 5a22082147f4..5f3d7e6de37b 100644 --- a/arch/i386/mach-visws/mpparse.c +++ b/arch/i386/mach-visws/mpparse.c @@ -23,7 +23,6 @@ unsigned long mp_lapic_addr; /* Processor that is doing the boot up */ unsigned int boot_cpu_physical_apicid = -1U; -unsigned int boot_cpu_logical_apicid = -1U; /* Bitmask of physically existing CPUs */ physid_mask_t phys_cpu_present_map; @@ -52,10 +51,8 @@ static void __init MP_processor_info (struct mpc_config_processor *m) (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4, m->mpc_apicver); - if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) { + if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) boot_cpu_physical_apicid = m->mpc_apicid; - boot_cpu_logical_apicid = logical_apicid; - } ver = m->mpc_apicver; if ((ver >= 0x14 && m->mpc_apicid >= 0xff) || m->mpc_apicid >= 0xf) { diff --git a/arch/i386/mm/discontig.c b/arch/i386/mm/discontig.c index f429c871e845..b358f0702a44 100644 --- a/arch/i386/mm/discontig.c +++ b/arch/i386/mm/discontig.c @@ -30,6 +30,8 @@ #include <linux/initrd.h> #include <linux/nodemask.h> #include <linux/module.h> +#include <linux/kexec.h> + #include <asm/e820.h> #include <asm/setup.h> #include <asm/mmzone.h> diff --git a/arch/i386/mm/fault.c b/arch/i386/mm/fault.c index a509237c4815..8e90339d6eaa 100644 --- a/arch/i386/mm/fault.c +++ b/arch/i386/mm/fault.c @@ -146,7 +146,7 @@ static int __is_prefetch(struct pt_regs *regs, unsigned long addr) if (instr > limit) break; - if (__get_user(opcode, (unsigned char *) instr)) + if (__get_user(opcode, (unsigned char __user *) instr)) break; instr_hi = opcode & 0xf0; @@ -173,7 +173,7 @@ static int __is_prefetch(struct pt_regs *regs, unsigned long addr) scan_more = 0; if (instr > limit) break; - if (__get_user(opcode, (unsigned char *) instr)) + if (__get_user(opcode, (unsigned char __user *) instr)) break; prefetch = (instr_lo == 0xF) && (opcode == 0x0D || opcode == 0x18); @@ -463,6 +463,9 @@ no_context: printk(KERN_ALERT "*pte = %08lx\n", page); } #endif + tsk->thread.cr2 = address; + tsk->thread.trap_no = 14; + tsk->thread.error_code = error_code; die("Oops", regs, error_code); bust_spinlocks(0); do_exit(SIGKILL); diff --git a/arch/i386/mm/highmem.c b/arch/i386/mm/highmem.c index 4b7aaf99d7ea..b6eb4dcb8777 100644 --- a/arch/i386/mm/highmem.c +++ b/arch/i386/mm/highmem.c @@ -75,6 +75,24 @@ void kunmap_atomic(void *kvaddr, enum km_type type) preempt_check_resched(); } +/* This is the same as kmap_atomic() but can map memory that doesn't + * have a struct page associated with it. + */ +void *kmap_atomic_pfn(unsigned long pfn, enum km_type type) +{ + enum fixed_addresses idx; + unsigned long vaddr; + + inc_preempt_count(); + + idx = type + KM_TYPE_NR*smp_processor_id(); + vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); + set_pte(kmap_pte-idx, pfn_pte(pfn, kmap_prot)); + __flush_tlb_one(vaddr); + + return (void*) vaddr; +} + struct page *kmap_atomic_to_page(void *ptr) { unsigned long idx, vaddr = (unsigned long)ptr; diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c index 3672e2ef51ae..12216b52e28b 100644 --- a/arch/i386/mm/init.c +++ b/arch/i386/mm/init.c @@ -352,7 +352,7 @@ static void __init pagetable_init (void) #endif } -#if defined(CONFIG_PM_DISK) || defined(CONFIG_SOFTWARE_SUSPEND) +#ifdef CONFIG_SOFTWARE_SUSPEND /* * Swap suspend & friends need this for resume because things like the intel-agp * driver might have split up a kernel 4MB mapping. diff --git a/arch/i386/mm/ioremap.c b/arch/i386/mm/ioremap.c index d393eefc7052..6b25afc933b6 100644 --- a/arch/i386/mm/ioremap.c +++ b/arch/i386/mm/ioremap.c @@ -243,7 +243,7 @@ void iounmap(volatile void __iomem *addr) write_lock(&vmlist_lock); p = __remove_vm_area((void *) (PAGE_MASK & (unsigned long __force) addr)); if (!p) { - printk("iounmap: bad address %p\n", addr); + printk(KERN_WARNING "iounmap: bad address %p\n", addr); goto out_unlock; } diff --git a/arch/i386/mm/pgtable.c b/arch/i386/mm/pgtable.c index 270c59f099a4..bd2f7afc7a2a 100644 --- a/arch/i386/mm/pgtable.c +++ b/arch/i386/mm/pgtable.c @@ -32,9 +32,9 @@ void show_mem(void) unsigned long i; struct page_state ps; - printk("Mem-info:\n"); + printk(KERN_INFO "Mem-info:\n"); show_free_areas(); - printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); + printk(KERN_INFO "Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); for_each_pgdat(pgdat) { for (i = 0; i < pgdat->node_spanned_pages; ++i) { page = pgdat_page_nr(pgdat, i); @@ -49,18 +49,18 @@ void show_mem(void) shared += page_count(page) - 1; } } - printk("%d pages of RAM\n", total); - printk("%d pages of HIGHMEM\n",highmem); - printk("%d reserved pages\n",reserved); - printk("%d pages shared\n",shared); - printk("%d pages swap cached\n",cached); + printk(KERN_INFO "%d pages of RAM\n", total); + printk(KERN_INFO "%d pages of HIGHMEM\n", highmem); + printk(KERN_INFO "%d reserved pages\n", reserved); + printk(KERN_INFO "%d pages shared\n", shared); + printk(KERN_INFO "%d pages swap cached\n", cached); get_page_state(&ps); - printk("%lu pages dirty\n", ps.nr_dirty); - printk("%lu pages writeback\n", ps.nr_writeback); - printk("%lu pages mapped\n", ps.nr_mapped); - printk("%lu pages slab\n", ps.nr_slab); - printk("%lu pages pagetables\n", ps.nr_page_table_pages); + printk(KERN_INFO "%lu pages dirty\n", ps.nr_dirty); + printk(KERN_INFO "%lu pages writeback\n", ps.nr_writeback); + printk(KERN_INFO "%lu pages mapped\n", ps.nr_mapped); + printk(KERN_INFO "%lu pages slab\n", ps.nr_slab); + printk(KERN_INFO "%lu pages pagetables\n", ps.nr_page_table_pages); } /* @@ -113,16 +113,16 @@ void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags) pmd_t *pmd; if (vaddr & (PMD_SIZE-1)) { /* vaddr is misaligned */ - printk ("set_pmd_pfn: vaddr misaligned\n"); + printk(KERN_WARNING "set_pmd_pfn: vaddr misaligned\n"); return; /* BUG(); */ } if (pfn & (PTRS_PER_PTE-1)) { /* pfn is misaligned */ - printk ("set_pmd_pfn: pfn misaligned\n"); + printk(KERN_WARNING "set_pmd_pfn: pfn misaligned\n"); return; /* BUG(); */ } pgd = swapper_pg_dir + pgd_index(vaddr); if (pgd_none(*pgd)) { - printk ("set_pmd_pfn: pgd_none\n"); + printk(KERN_WARNING "set_pmd_pfn: pgd_none\n"); return; /* BUG(); */ } pud = pud_offset(pgd, vaddr); diff --git a/arch/i386/power/cpu.c b/arch/i386/power/cpu.c index 6f521cf19a13..0e6b45b61251 100644 --- a/arch/i386/power/cpu.c +++ b/arch/i386/power/cpu.c @@ -22,9 +22,11 @@ #include <linux/device.h> #include <linux/suspend.h> #include <linux/acpi.h> + #include <asm/uaccess.h> #include <asm/acpi.h> #include <asm/tlbflush.h> +#include <asm/processor.h> static struct saved_context saved_context; @@ -33,8 +35,6 @@ unsigned long saved_context_esp, saved_context_ebp; unsigned long saved_context_esi, saved_context_edi; unsigned long saved_context_eflags; -extern void enable_sep_cpu(void *); - void __save_processor_state(struct saved_context *ctxt) { kernel_fpu_begin(); @@ -44,7 +44,6 @@ void __save_processor_state(struct saved_context *ctxt) */ asm volatile ("sgdt %0" : "=m" (ctxt->gdt_limit)); asm volatile ("sidt %0" : "=m" (ctxt->idt_limit)); - asm volatile ("sldt %0" : "=m" (ctxt->ldt)); asm volatile ("str %0" : "=m" (ctxt->tr)); /* @@ -107,7 +106,6 @@ static void fix_processor_context(void) void __restore_processor_state(struct saved_context *ctxt) { - /* * control registers */ @@ -117,6 +115,13 @@ void __restore_processor_state(struct saved_context *ctxt) asm volatile ("movl %0, %%cr0" :: "r" (ctxt->cr0)); /* + * now restore the descriptor tables to their proper values + * ltr is done i fix_processor_context(). + */ + asm volatile ("lgdt %0" :: "m" (ctxt->gdt_limit)); + asm volatile ("lidt %0" :: "m" (ctxt->idt_limit)); + + /* * segment registers */ asm volatile ("movw %0, %%es" :: "r" (ctxt->es)); @@ -125,18 +130,10 @@ void __restore_processor_state(struct saved_context *ctxt) asm volatile ("movw %0, %%ss" :: "r" (ctxt->ss)); /* - * now restore the descriptor tables to their proper values - * ltr is done i fix_processor_context(). - */ - asm volatile ("lgdt %0" :: "m" (ctxt->gdt_limit)); - asm volatile ("lidt %0" :: "m" (ctxt->idt_limit)); - asm volatile ("lldt %0" :: "m" (ctxt->ldt)); - - /* * sysenter MSRs */ if (boot_cpu_has(X86_FEATURE_SEP)) - enable_sep_cpu(NULL); + enable_sep_cpu(); fix_processor_context(); do_fpu_end(); diff --git a/arch/ia64/kernel/domain.c b/arch/ia64/kernel/domain.c index fe532c970438..d65e87b6394f 100644 --- a/arch/ia64/kernel/domain.c +++ b/arch/ia64/kernel/domain.c @@ -14,7 +14,7 @@ #include <linux/topology.h> #include <linux/nodemask.h> -#define SD_NODES_PER_DOMAIN 6 +#define SD_NODES_PER_DOMAIN 16 #ifdef CONFIG_NUMA /** @@ -27,7 +27,7 @@ * * Should use nodemask_t. */ -static int __devinit find_next_best_node(int node, unsigned long *used_nodes) +static int find_next_best_node(int node, unsigned long *used_nodes) { int i, n, val, min_val, best_node = 0; @@ -66,7 +66,7 @@ static int __devinit find_next_best_node(int node, unsigned long *used_nodes) * should be one that prevents unnecessary balancing, but also spreads tasks * out optimally. */ -static cpumask_t __devinit sched_domain_node_span(int node) +static cpumask_t sched_domain_node_span(int node) { int i; cpumask_t span, nodemask; @@ -96,7 +96,7 @@ static cpumask_t __devinit sched_domain_node_span(int node) #ifdef CONFIG_SCHED_SMT static DEFINE_PER_CPU(struct sched_domain, cpu_domains); static struct sched_group sched_group_cpus[NR_CPUS]; -static int __devinit cpu_to_cpu_group(int cpu) +static int cpu_to_cpu_group(int cpu) { return cpu; } @@ -104,7 +104,7 @@ static int __devinit cpu_to_cpu_group(int cpu) static DEFINE_PER_CPU(struct sched_domain, phys_domains); static struct sched_group sched_group_phys[NR_CPUS]; -static int __devinit cpu_to_phys_group(int cpu) +static int cpu_to_phys_group(int cpu) { #ifdef CONFIG_SCHED_SMT return first_cpu(cpu_sibling_map[cpu]); @@ -125,44 +125,36 @@ static struct sched_group *sched_group_nodes[MAX_NUMNODES]; static DEFINE_PER_CPU(struct sched_domain, allnodes_domains); static struct sched_group sched_group_allnodes[MAX_NUMNODES]; -static int __devinit cpu_to_allnodes_group(int cpu) +static int cpu_to_allnodes_group(int cpu) { return cpu_to_node(cpu); } #endif /* - * Set up scheduler domains and groups. Callers must hold the hotplug lock. + * Build sched domains for a given set of cpus and attach the sched domains + * to the individual cpus */ -void __devinit arch_init_sched_domains(void) +void build_sched_domains(const cpumask_t *cpu_map) { int i; - cpumask_t cpu_default_map; /* - * Setup mask for cpus without special case scheduling requirements. - * For now this just excludes isolated cpus, but could be used to - * exclude other special cases in the future. + * Set up domains for cpus specified by the cpu_map. */ - cpus_complement(cpu_default_map, cpu_isolated_map); - cpus_and(cpu_default_map, cpu_default_map, cpu_online_map); - - /* - * Set up domains. Isolated domains just stay on the dummy domain. - */ - for_each_cpu_mask(i, cpu_default_map) { + for_each_cpu_mask(i, *cpu_map) { int group; struct sched_domain *sd = NULL, *p; cpumask_t nodemask = node_to_cpumask(cpu_to_node(i)); - cpus_and(nodemask, nodemask, cpu_default_map); + cpus_and(nodemask, nodemask, *cpu_map); #ifdef CONFIG_NUMA if (num_online_cpus() > SD_NODES_PER_DOMAIN*cpus_weight(nodemask)) { sd = &per_cpu(allnodes_domains, i); *sd = SD_ALLNODES_INIT; - sd->span = cpu_default_map; + sd->span = *cpu_map; group = cpu_to_allnodes_group(i); sd->groups = &sched_group_allnodes[group]; p = sd; @@ -173,7 +165,7 @@ void __devinit arch_init_sched_domains(void) *sd = SD_NODE_INIT; sd->span = sched_domain_node_span(cpu_to_node(i)); sd->parent = p; - cpus_and(sd->span, sd->span, cpu_default_map); + cpus_and(sd->span, sd->span, *cpu_map); #endif p = sd; @@ -190,7 +182,7 @@ void __devinit arch_init_sched_domains(void) group = cpu_to_cpu_group(i); *sd = SD_SIBLING_INIT; sd->span = cpu_sibling_map[i]; - cpus_and(sd->span, sd->span, cpu_default_map); + cpus_and(sd->span, sd->span, *cpu_map); sd->parent = p; sd->groups = &sched_group_cpus[group]; #endif @@ -198,9 +190,9 @@ void __devinit arch_init_sched_domains(void) #ifdef CONFIG_SCHED_SMT /* Set up CPU (sibling) groups */ - for_each_cpu_mask(i, cpu_default_map) { + for_each_cpu_mask(i, *cpu_map) { cpumask_t this_sibling_map = cpu_sibling_map[i]; - cpus_and(this_sibling_map, this_sibling_map, cpu_default_map); + cpus_and(this_sibling_map, this_sibling_map, *cpu_map); if (i != first_cpu(this_sibling_map)) continue; @@ -213,7 +205,7 @@ void __devinit arch_init_sched_domains(void) for (i = 0; i < MAX_NUMNODES; i++) { cpumask_t nodemask = node_to_cpumask(i); - cpus_and(nodemask, nodemask, cpu_default_map); + cpus_and(nodemask, nodemask, *cpu_map); if (cpus_empty(nodemask)) continue; @@ -222,7 +214,7 @@ void __devinit arch_init_sched_domains(void) } #ifdef CONFIG_NUMA - init_sched_build_groups(sched_group_allnodes, cpu_default_map, + init_sched_build_groups(sched_group_allnodes, *cpu_map, &cpu_to_allnodes_group); for (i = 0; i < MAX_NUMNODES; i++) { @@ -233,12 +225,12 @@ void __devinit arch_init_sched_domains(void) cpumask_t covered = CPU_MASK_NONE; int j; - cpus_and(nodemask, nodemask, cpu_default_map); + cpus_and(nodemask, nodemask, *cpu_map); if (cpus_empty(nodemask)) continue; domainspan = sched_domain_node_span(i); - cpus_and(domainspan, domainspan, cpu_default_map); + cpus_and(domainspan, domainspan, *cpu_map); sg = kmalloc(sizeof(struct sched_group), GFP_KERNEL); sched_group_nodes[i] = sg; @@ -266,7 +258,7 @@ void __devinit arch_init_sched_domains(void) int n = (i + j) % MAX_NUMNODES; cpus_complement(notcovered, covered); - cpus_and(tmp, notcovered, cpu_default_map); + cpus_and(tmp, notcovered, *cpu_map); cpus_and(tmp, tmp, domainspan); if (cpus_empty(tmp)) break; @@ -293,7 +285,7 @@ void __devinit arch_init_sched_domains(void) #endif /* Calculate CPU power for physical packages and nodes */ - for_each_cpu_mask(i, cpu_default_map) { + for_each_cpu_mask(i, *cpu_map) { int power; struct sched_domain *sd; #ifdef CONFIG_SCHED_SMT @@ -359,13 +351,35 @@ next_sg: cpu_attach_domain(sd, i); } } +/* + * Set up scheduler domains and groups. Callers must hold the hotplug lock. + */ +void arch_init_sched_domains(const cpumask_t *cpu_map) +{ + cpumask_t cpu_default_map; + + /* + * Setup mask for cpus without special case scheduling requirements. + * For now this just excludes isolated cpus, but could be used to + * exclude other special cases in the future. + */ + cpus_andnot(cpu_default_map, *cpu_map, cpu_isolated_map); + + build_sched_domains(&cpu_default_map); +} -void __devinit arch_destroy_sched_domains(void) +void arch_destroy_sched_domains(const cpumask_t *cpu_map) { #ifdef CONFIG_NUMA int i; for (i = 0; i < MAX_NUMNODES; i++) { + cpumask_t nodemask = node_to_cpumask(i); struct sched_group *oldsg, *sg = sched_group_nodes[i]; + + cpus_and(nodemask, nodemask, *cpu_map); + if (cpus_empty(nodemask)) + continue; + if (sg == NULL) continue; sg = sg->next; diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c index 3865f088ffa2..623b0a546709 100644 --- a/arch/ia64/kernel/smpboot.c +++ b/arch/ia64/kernel/smpboot.c @@ -346,6 +346,7 @@ smp_callin (void) lock_ipi_calllock(); cpu_set(cpuid, cpu_online_map); unlock_ipi_calllock(); + per_cpu(cpu_state, cpuid) = CPU_ONLINE; smp_setup_percpu_timer(); @@ -611,6 +612,7 @@ void __devinit smp_prepare_boot_cpu(void) { cpu_set(smp_processor_id(), cpu_online_map); cpu_set(smp_processor_id(), cpu_callin_map); + per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; } /* @@ -688,6 +690,7 @@ int __cpu_disable(void) return -EBUSY; remove_siblinginfo(cpu); + cpu_clear(cpu, cpu_online_map); fixup_irqs(); local_flush_tlb_all(); cpu_clear(cpu, cpu_callin_map); @@ -774,6 +777,7 @@ __cpu_up (unsigned int cpu) if (cpu_isset(cpu, cpu_callin_map)) return -EINVAL; + per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; /* Processor goes to start_secondary(), sets online flag */ ret = do_boot_cpu(sapicid, cpu); if (ret < 0) diff --git a/arch/ia64/sn/kernel/xpc.h b/arch/ia64/sn/kernel/xpc.h index 1a0aed8490d1..d0ee635daf2e 100644 --- a/arch/ia64/sn/kernel/xpc.h +++ b/arch/ia64/sn/kernel/xpc.h @@ -87,7 +87,7 @@ struct xpc_rsvd_page { u8 partid; /* partition ID from SAL */ u8 version; u8 pad[6]; /* pad to u64 align */ - u64 vars_pa; + volatile u64 vars_pa; u64 part_nasids[XP_NASID_MASK_WORDS] ____cacheline_aligned; u64 mach_nasids[XP_NASID_MASK_WORDS] ____cacheline_aligned; }; @@ -138,7 +138,7 @@ struct xpc_vars { * occupies half a cacheline. */ struct xpc_vars_part { - u64 magic; + volatile u64 magic; u64 openclose_args_pa; /* physical address of open and close args */ u64 GPs_pa; /* physical address of Get/Put values */ @@ -185,8 +185,8 @@ struct xpc_vars_part { * Define a Get/Put value pair (pointers) used with a message queue. */ struct xpc_gp { - s64 get; /* Get value */ - s64 put; /* Put value */ + volatile s64 get; /* Get value */ + volatile s64 put; /* Put value */ }; #define XPC_GP_SIZE \ @@ -231,7 +231,7 @@ struct xpc_openclose_args { */ struct xpc_notify { struct semaphore sema; /* notify semaphore */ - u8 type; /* type of notification */ + volatile u8 type; /* type of notification */ /* the following two fields are only used if type == XPC_N_CALL */ xpc_notify_func func; /* user's notify function */ @@ -439,7 +439,7 @@ struct xpc_partition { /* XPC infrastructure referencing and teardown control */ - u8 setup_state; /* infrastructure setup state */ + volatile u8 setup_state; /* infrastructure setup state */ wait_queue_head_t teardown_wq; /* kthread waiting to teardown infra */ atomic_t references; /* #of references to infrastructure */ diff --git a/arch/ia64/sn/kernel/xpc_channel.c b/arch/ia64/sn/kernel/xpc_channel.c index 0bf6fbcc46d2..6d02dac8056f 100644 --- a/arch/ia64/sn/kernel/xpc_channel.c +++ b/arch/ia64/sn/kernel/xpc_channel.c @@ -209,7 +209,7 @@ xpc_setup_infrastructure(struct xpc_partition *part) * With the setting of the partition setup_state to XPC_P_SETUP, we're * declaring that this partition is ready to go. */ - (volatile u8) part->setup_state = XPC_P_SETUP; + part->setup_state = XPC_P_SETUP; /* @@ -227,7 +227,7 @@ xpc_setup_infrastructure(struct xpc_partition *part) xpc_vars_part[partid].IPI_phys_cpuid = cpu_physical_id(smp_processor_id()); xpc_vars_part[partid].nchannels = part->nchannels; - (volatile u64) xpc_vars_part[partid].magic = XPC_VP_MAGIC1; + xpc_vars_part[partid].magic = XPC_VP_MAGIC1; return xpcSuccess; } @@ -355,7 +355,7 @@ xpc_pull_remote_vars_part(struct xpc_partition *part) /* let the other side know that we've pulled their variables */ - (volatile u64) xpc_vars_part[partid].magic = XPC_VP_MAGIC2; + xpc_vars_part[partid].magic = XPC_VP_MAGIC2; } if (pulled_entry->magic == XPC_VP_MAGIC1) { @@ -1183,7 +1183,7 @@ xpc_process_msg_IPI(struct xpc_partition *part, int ch_number) */ xpc_clear_local_msgqueue_flags(ch); - (volatile s64) ch->w_remote_GP.get = ch->remote_GP.get; + ch->w_remote_GP.get = ch->remote_GP.get; dev_dbg(xpc_chan, "w_remote_GP.get changed to %ld, partid=%d, " "channel=%d\n", ch->w_remote_GP.get, ch->partid, @@ -1211,7 +1211,7 @@ xpc_process_msg_IPI(struct xpc_partition *part, int ch_number) */ xpc_clear_remote_msgqueue_flags(ch); - (volatile s64) ch->w_remote_GP.put = ch->remote_GP.put; + ch->w_remote_GP.put = ch->remote_GP.put; dev_dbg(xpc_chan, "w_remote_GP.put changed to %ld, partid=%d, " "channel=%d\n", ch->w_remote_GP.put, ch->partid, @@ -1875,7 +1875,7 @@ xpc_send_msg(struct xpc_channel *ch, struct xpc_msg *msg, u8 notify_type, notify = &ch->notify_queue[msg_number % ch->local_nentries]; notify->func = func; notify->key = key; - (volatile u8) notify->type = notify_type; + notify->type = notify_type; // >>> is a mb() needed here? diff --git a/arch/ia64/sn/kernel/xpc_partition.c b/arch/ia64/sn/kernel/xpc_partition.c index cd7ed73f0e7a..578265ea9e67 100644 --- a/arch/ia64/sn/kernel/xpc_partition.c +++ b/arch/ia64/sn/kernel/xpc_partition.c @@ -253,7 +253,7 @@ xpc_rsvd_page_init(void) * This signifies to the remote partition that our reserved * page is initialized. */ - (volatile u64) rp->vars_pa = __pa(xpc_vars); + rp->vars_pa = __pa(xpc_vars); return rp; } diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 94f5a8eb2c22..bd9de7b00c0a 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -1416,6 +1416,12 @@ config HIGHMEM bool "High Memory Support" depends on MIPS32 && (CPU_R3000 || CPU_SB1 || CPU_R7000 || CPU_RM9000 || CPU_R10000) && !(MACH_DECSTATION || MOMENCO_JAGUAR_ATX) +config ARCH_FLATMEM_ENABLE + def_bool y + depends on !NUMA + +source "mm/Kconfig" + config SMP bool "Multi-Processing support" depends on CPU_RM9000 || (SIBYTE_SB1250 && !SIBYTE_STANDALONE) || SGI_IP27 diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c index 6018ca25aceb..3a240e3e004c 100644 --- a/arch/mips/kernel/setup.c +++ b/arch/mips/kernel/setup.c @@ -33,6 +33,7 @@ #include <linux/root_dev.h> #include <linux/highmem.h> #include <linux/console.h> +#include <linux/mmzone.h> #include <asm/addrspace.h> #include <asm/bootinfo.h> @@ -356,6 +357,8 @@ static inline void bootmem_init(void) } #endif + memory_present(0, first_usable_pfn, max_low_pfn); + /* Initialize the boot-time allocator with low memory only. */ bootmap_size = init_bootmem(first_usable_pfn, max_low_pfn); @@ -557,6 +560,7 @@ void __init setup_arch(char **cmdline_p) parse_cmdline_early(); bootmem_init(); + sparse_init(); paging_init(); resource_init(); } diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index 73843c528778..9c9a271c8a3a 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c @@ -128,7 +128,7 @@ static void __init fixrange_init(unsigned long start, unsigned long end, #endif /* CONFIG_MIPS64 */ #endif /* CONFIG_HIGHMEM */ -#ifndef CONFIG_DISCONTIGMEM +#ifndef CONFIG_NEED_MULTIPLE_NODES extern void pagetable_init(void); void __init paging_init(void) @@ -253,7 +253,7 @@ void __init mem_init(void) initsize >> 10, (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10))); } -#endif /* !CONFIG_DISCONTIGMEM */ +#endif /* !CONFIG_NEED_MULTIPLE_NODES */ #ifdef CONFIG_BLK_DEV_INITRD void free_initrd_mem(unsigned long start, unsigned long end) diff --git a/arch/mips/mm/pgtable.c b/arch/mips/mm/pgtable.c index 3b88fdeef329..3fe94202da8c 100644 --- a/arch/mips/mm/pgtable.c +++ b/arch/mips/mm/pgtable.c @@ -5,7 +5,7 @@ void show_mem(void) { -#ifndef CONFIG_DISCONTIGMEM /* XXX(hch): later.. */ +#ifndef CONFIG_NEED_MULTIPLE_NODES /* XXX(hch): later.. */ int pfn, total = 0, reserved = 0; int shared = 0, cached = 0; int highmem = 0; diff --git a/arch/ppc/Kconfig b/arch/ppc/Kconfig index 848f43970a4b..a7835cd3f51f 100644 --- a/arch/ppc/Kconfig +++ b/arch/ppc/Kconfig @@ -88,6 +88,9 @@ config 8xx depends on BROKEN bool "8xx" +config E200 + bool "e200" + config E500 bool "e500" @@ -98,12 +101,12 @@ config PPC_FPU config BOOKE bool - depends on E500 + depends on E200 || E500 default y config FSL_BOOKE bool - depends on E500 + depends on E200 || E500 default y config PTE_64BIT @@ -141,16 +144,16 @@ config ALTIVEC config SPE bool "SPE Support" - depends on E500 + depends on E200 || E500 ---help--- This option enables kernel support for the Signal Processing Extensions (SPE) to the PowerPC processor. The kernel currently supports saving and restoring SPE registers, and turning on the 'spe enable' bit so user processes can execute SPE instructions. - This option is only usefully if you have a processor that supports + This option is only useful if you have a processor that supports SPE (e500, otherwise known as 85xx series), but does not have any - affect on a non-spe cpu (it does, however add code to the kernel). + effect on a non-spe cpu (it does, however add code to the kernel). If in doubt, say Y here. @@ -200,7 +203,7 @@ config TAU_AVERAGE config MATH_EMULATION bool "Math emulation" - depends on 4xx || 8xx || E500 + depends on 4xx || 8xx || E200 || E500 ---help--- Some PowerPC chips designed for embedded applications do not have a floating-point unit and therefore do not implement the @@ -214,6 +217,26 @@ config MATH_EMULATION here. Saying Y here will not hurt performance (on any machine) but will increase the size of the kernel. +config KEXEC + bool "kexec system call (EXPERIMENTAL)" + depends on EXPERIMENTAL + help + kexec is a system call that implements the ability to shutdown your + current kernel, and to start another kernel. It is like a reboot + but it is indepedent of the system firmware. And like a reboot + you can start any kernel with it, not just Linux. + + The name comes from the similiarity to the exec system call. + + It is an ongoing process to be certain the hardware in a machine + is properly shutdown, so do not be surprised if this code does not + initially work for you. It may help to enable device hotplugging + support. As of this writing the exact hardware interface is + strongly in flux, so no good recommendation can be made. + + In the GameCube implementation, kexec allows you to load and + run DOL files, including kernel and homebrew DOLs. + source "drivers/cpufreq/Kconfig" config CPU_FREQ_PMAC @@ -254,7 +277,7 @@ config PPC_STD_MMU config NOT_COHERENT_CACHE bool - depends on 4xx || 8xx + depends on 4xx || 8xx || E200 default y endmenu diff --git a/arch/ppc/Kconfig.debug b/arch/ppc/Kconfig.debug index d2e1eea8e8e4..e16c7710d4be 100644 --- a/arch/ppc/Kconfig.debug +++ b/arch/ppc/Kconfig.debug @@ -66,7 +66,7 @@ config SERIAL_TEXT_DEBUG config PPC_OCP bool - depends on IBM_OCP || FSL_OCP || XILINX_OCP + depends on IBM_OCP || XILINX_OCP default y endmenu diff --git a/arch/ppc/Makefile b/arch/ppc/Makefile index 0432a25b4735..f9b0d778dd82 100644 --- a/arch/ppc/Makefile +++ b/arch/ppc/Makefile @@ -29,7 +29,7 @@ CPP = $(CC) -E $(CFLAGS) CHECKFLAGS += -D__powerpc__ -ifndef CONFIG_E500 +ifndef CONFIG_FSL_BOOKE CFLAGS += -mstring endif @@ -38,6 +38,7 @@ cpu-as-$(CONFIG_4xx) += -Wa,-m405 cpu-as-$(CONFIG_6xx) += -Wa,-maltivec cpu-as-$(CONFIG_POWER4) += -Wa,-maltivec cpu-as-$(CONFIG_E500) += -Wa,-me500 +cpu-as-$(CONFIG_E200) += -Wa,-me200 AFLAGS += $(cpu-as-y) CFLAGS += $(cpu-as-y) diff --git a/arch/ppc/boot/openfirmware/chrpmain.c b/arch/ppc/boot/openfirmware/chrpmain.c index 6fb4f738728c..effe4a0624b0 100644 --- a/arch/ppc/boot/openfirmware/chrpmain.c +++ b/arch/ppc/boot/openfirmware/chrpmain.c @@ -39,7 +39,7 @@ char *avail_high; #define SCRATCH_SIZE (128 << 10) -static char scratch[SCRATCH_SIZE]; /* 1MB of scratch space for gunzip */ +static char scratch[SCRATCH_SIZE]; /* 128k of scratch space for gunzip */ typedef void (*kernel_start_t)(int, int, void *, unsigned int, unsigned int); diff --git a/arch/ppc/kernel/Makefile b/arch/ppc/kernel/Makefile index b284451802c9..b1457a8a9c0f 100644 --- a/arch/ppc/kernel/Makefile +++ b/arch/ppc/kernel/Makefile @@ -26,7 +26,10 @@ obj-$(CONFIG_KGDB) += ppc-stub.o obj-$(CONFIG_SMP) += smp.o smp-tbsync.o obj-$(CONFIG_TAU) += temp.o obj-$(CONFIG_ALTIVEC) += vecemu.o vector.o +ifndef CONFIG_E200 obj-$(CONFIG_FSL_BOOKE) += perfmon_fsl_booke.o +endif +obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o ifndef CONFIG_MATH_EMULATION obj-$(CONFIG_8xx) += softemu8xx.o diff --git a/arch/ppc/kernel/cputable.c b/arch/ppc/kernel/cputable.c index 01c226008dbf..50936cda0af9 100644 --- a/arch/ppc/kernel/cputable.c +++ b/arch/ppc/kernel/cputable.c @@ -903,7 +903,30 @@ struct cpu_spec cpu_specs[] = { .dcache_bsize = 32, }, #endif /* CONFIG_44x */ -#ifdef CONFIG_E500 +#ifdef CONFIG_FSL_BOOKE + { /* e200z5 */ + .pvr_mask = 0xfff00000, + .pvr_value = 0x81000000, + .cpu_name = "e200z5", + /* xxx - galak: add CPU_FTR_MAYBE_CAN_DOZE */ + .cpu_features = CPU_FTR_USE_TB, + .cpu_user_features = PPC_FEATURE_32 | + PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_EFP_SINGLE | + PPC_FEATURE_UNIFIED_CACHE, + .dcache_bsize = 32, + }, + { /* e200z6 */ + .pvr_mask = 0xfff00000, + .pvr_value = 0x81100000, + .cpu_name = "e200z6", + /* xxx - galak: add CPU_FTR_MAYBE_CAN_DOZE */ + .cpu_features = CPU_FTR_USE_TB, + .cpu_user_features = PPC_FEATURE_32 | + PPC_FEATURE_HAS_MMU | PPC_FEATURE_SPE_COMP | + PPC_FEATURE_HAS_EFP_SINGLE | + PPC_FEATURE_UNIFIED_CACHE, + .dcache_bsize = 32, + }, { /* e500 */ .pvr_mask = 0xffff0000, .pvr_value = 0x80200000, diff --git a/arch/ppc/kernel/entry.S b/arch/ppc/kernel/entry.S index 8377b6ca26da..d4df68629cc6 100644 --- a/arch/ppc/kernel/entry.S +++ b/arch/ppc/kernel/entry.S @@ -60,6 +60,11 @@ mcheck_transfer_to_handler: TRANSFER_TO_HANDLER_EXC_LEVEL(MCHECK) b transfer_to_handler_full + .globl debug_transfer_to_handler +debug_transfer_to_handler: + TRANSFER_TO_HANDLER_EXC_LEVEL(DEBUG) + b transfer_to_handler_full + .globl crit_transfer_to_handler crit_transfer_to_handler: TRANSFER_TO_HANDLER_EXC_LEVEL(CRIT) @@ -835,6 +840,10 @@ ret_from_crit_exc: RET_FROM_EXC_LEVEL(SPRN_CSRR0, SPRN_CSRR1, RFCI) #ifdef CONFIG_BOOKE + .globl ret_from_debug_exc +ret_from_debug_exc: + RET_FROM_EXC_LEVEL(SPRN_DSRR0, SPRN_DSRR1, RFDI) + .globl ret_from_mcheck_exc ret_from_mcheck_exc: RET_FROM_EXC_LEVEL(SPRN_MCSRR0, SPRN_MCSRR1, RFMCI) diff --git a/arch/ppc/kernel/head_booke.h b/arch/ppc/kernel/head_booke.h index 9c50f9d2657c..9342acf12e72 100644 --- a/arch/ppc/kernel/head_booke.h +++ b/arch/ppc/kernel/head_booke.h @@ -49,6 +49,7 @@ * * On 40x critical is the only additional level * On 44x/e500 we have critical and machine check + * On e200 we have critical and debug (machine check occurs via critical) * * Additionally we reserve a SPRG for each priority level so we can free up a * GPR to use as the base for indirect access to the exception stacks. This @@ -60,12 +61,16 @@ /* CRIT_SPRG only used in critical exception handling */ #define CRIT_SPRG SPRN_SPRG2 -/* MCHECK_SPRG only used in critical exception handling */ +/* MCHECK_SPRG only used in machine check exception handling */ #define MCHECK_SPRG SPRN_SPRG6W #define MCHECK_STACK_TOP (exception_stack_top - 4096) #define CRIT_STACK_TOP (exception_stack_top) +/* only on e200 for now */ +#define DEBUG_STACK_TOP (exception_stack_top - 4096) +#define DEBUG_SPRG SPRN_SPRG6W + #ifdef CONFIG_SMP #define BOOKE_LOAD_EXC_LEVEL_STACK(level) \ mfspr r8,SPRN_PIR; \ @@ -124,6 +129,8 @@ #define CRITICAL_EXCEPTION_PROLOG \ EXC_LEVEL_EXCEPTION_PROLOG(CRIT, SPRN_CSRR0, SPRN_CSRR1) +#define DEBUG_EXCEPTION_PROLOG \ + EXC_LEVEL_EXCEPTION_PROLOG(DEBUG, SPRN_DSRR0, SPRN_DSRR1) #define MCHECK_EXCEPTION_PROLOG \ EXC_LEVEL_EXCEPTION_PROLOG(MCHECK, SPRN_MCSRR0, SPRN_MCSRR1) @@ -205,6 +212,60 @@ label: * save (and later restore) the MSR via SPRN_CSRR1, which will still have * the MSR_DE bit set. */ +#ifdef CONFIG_E200 +#define DEBUG_EXCEPTION \ + START_EXCEPTION(Debug); \ + DEBUG_EXCEPTION_PROLOG; \ + \ + /* \ + * If there is a single step or branch-taken exception in an \ + * exception entry sequence, it was probably meant to apply to \ + * the code where the exception occurred (since exception entry \ + * doesn't turn off DE automatically). We simulate the effect \ + * of turning off DE on entry to an exception handler by turning \ + * off DE in the CSRR1 value and clearing the debug status. \ + */ \ + mfspr r10,SPRN_DBSR; /* check single-step/branch taken */ \ + andis. r10,r10,DBSR_IC@h; \ + beq+ 2f; \ + \ + lis r10,KERNELBASE@h; /* check if exception in vectors */ \ + ori r10,r10,KERNELBASE@l; \ + cmplw r12,r10; \ + blt+ 2f; /* addr below exception vectors */ \ + \ + lis r10,Debug@h; \ + ori r10,r10,Debug@l; \ + cmplw r12,r10; \ + bgt+ 2f; /* addr above exception vectors */ \ + \ + /* here it looks like we got an inappropriate debug exception. */ \ +1: rlwinm r9,r9,0,~MSR_DE; /* clear DE in the CDRR1 value */ \ + lis r10,DBSR_IC@h; /* clear the IC event */ \ + mtspr SPRN_DBSR,r10; \ + /* restore state and get out */ \ + lwz r10,_CCR(r11); \ + lwz r0,GPR0(r11); \ + lwz r1,GPR1(r11); \ + mtcrf 0x80,r10; \ + mtspr SPRN_DSRR0,r12; \ + mtspr SPRN_DSRR1,r9; \ + lwz r9,GPR9(r11); \ + lwz r12,GPR12(r11); \ + mtspr DEBUG_SPRG,r8; \ + BOOKE_LOAD_EXC_LEVEL_STACK(DEBUG); /* r8 points to the debug stack */ \ + lwz r10,GPR10-INT_FRAME_SIZE(r8); \ + lwz r11,GPR11-INT_FRAME_SIZE(r8); \ + mfspr r8,DEBUG_SPRG; \ + \ + RFDI; \ + b .; \ + \ + /* continue normal handling for a critical exception... */ \ +2: mfspr r4,SPRN_DBSR; \ + addi r3,r1,STACK_FRAME_OVERHEAD; \ + EXC_XFER_TEMPLATE(DebugException, 0x2002, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), NOCOPY, debug_transfer_to_handler, ret_from_debug_exc) +#else #define DEBUG_EXCEPTION \ START_EXCEPTION(Debug); \ CRITICAL_EXCEPTION_PROLOG; \ @@ -257,6 +318,7 @@ label: 2: mfspr r4,SPRN_DBSR; \ addi r3,r1,STACK_FRAME_OVERHEAD; \ EXC_XFER_TEMPLATE(DebugException, 0x2002, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), NOCOPY, crit_transfer_to_handler, ret_from_crit_exc) +#endif #define INSTRUCTION_STORAGE_EXCEPTION \ START_EXCEPTION(InstructionStorage) \ diff --git a/arch/ppc/kernel/head_fsl_booke.S b/arch/ppc/kernel/head_fsl_booke.S index ce36e88ba627..eb804b7a3cb2 100644 --- a/arch/ppc/kernel/head_fsl_booke.S +++ b/arch/ppc/kernel/head_fsl_booke.S @@ -102,6 +102,7 @@ invstr: mflr r6 /* Make it accessible */ or r7,r7,r4 mtspr SPRN_MAS6,r7 tlbsx 0,r6 /* search MSR[IS], SPID=PID0 */ +#ifndef CONFIG_E200 mfspr r7,SPRN_MAS1 andis. r7,r7,MAS1_VALID@h bne match_TLB @@ -118,6 +119,7 @@ invstr: mflr r6 /* Make it accessible */ or r7,r7,r4 mtspr SPRN_MAS6,r7 tlbsx 0,r6 /* Fall through, we had to match */ +#endif match_TLB: mfspr r7,SPRN_MAS0 rlwinm r3,r7,16,20,31 /* Extract MAS0(Entry) */ @@ -196,8 +198,10 @@ skpinv: addi r6,r6,1 /* Increment */ /* 4. Clear out PIDs & Search info */ li r6,0 mtspr SPRN_PID0,r6 +#ifndef CONFIG_E200 mtspr SPRN_PID1,r6 mtspr SPRN_PID2,r6 +#endif mtspr SPRN_MAS6,r6 /* 5. Invalidate mapping we started in */ @@ -277,7 +281,9 @@ skpinv: addi r6,r6,1 /* Increment */ SET_IVOR(32, SPEUnavailable); SET_IVOR(33, SPEFloatingPointData); SET_IVOR(34, SPEFloatingPointRound); +#ifndef CONFIG_E200 SET_IVOR(35, PerformanceMonitor); +#endif /* Establish the interrupt vector base */ lis r4,interrupt_base@h /* IVPR only uses the high 16-bits */ @@ -285,6 +291,9 @@ skpinv: addi r6,r6,1 /* Increment */ /* Setup the defaults for TLB entries */ li r2,(MAS4_TSIZED(BOOKE_PAGESZ_4K))@l +#ifdef CONFIG_E200 + oris r2,r2,MAS4_TLBSELD(1)@h +#endif mtspr SPRN_MAS4, r2 #if 0 @@ -293,6 +302,12 @@ skpinv: addi r6,r6,1 /* Increment */ oris r2,r2,HID0_DOZE@h mtspr SPRN_HID0, r2 #endif +#ifdef CONFIG_E200 + /* enable dedicated debug exception handling resources (Debug APU) */ + mfspr r2,SPRN_HID0 + ori r2,r2,HID0_DAPUEN@l + mtspr SPRN_HID0,r2 +#endif #if !defined(CONFIG_BDI_SWITCH) /* @@ -414,7 +429,12 @@ interrupt_base: CRITICAL_EXCEPTION(0x0100, CriticalInput, UnknownException) /* Machine Check Interrupt */ +#ifdef CONFIG_E200 + /* no RFMCI, MCSRRs on E200 */ + CRITICAL_EXCEPTION(0x0200, MachineCheck, MachineCheckException) +#else MCHECK_EXCEPTION(0x0200, MachineCheck, MachineCheckException) +#endif /* Data Storage Interrupt */ START_EXCEPTION(DataStorage) @@ -520,8 +540,13 @@ interrupt_base: #ifdef CONFIG_PPC_FPU FP_UNAVAILABLE_EXCEPTION #else +#ifdef CONFIG_E200 + /* E200 treats 'normal' floating point instructions as FP Unavail exception */ + EXCEPTION(0x0800, FloatingPointUnavailable, ProgramCheckException, EXC_XFER_EE) +#else EXCEPTION(0x0800, FloatingPointUnavailable, UnknownException, EXC_XFER_EE) #endif +#endif /* System Call Interrupt */ START_EXCEPTION(SystemCall) @@ -691,6 +716,7 @@ interrupt_base: /* * Local functions */ + /* * Data TLB exceptions will bail out to this point * if they can't resolve the lightweight TLB fault. @@ -761,6 +787,31 @@ END_FTR_SECTION_IFSET(CPU_FTR_BIG_PHYS) 2: rlwimi r11, r12, 0, 20, 31 /* Extract RPN from PTE and merge with perms */ mtspr SPRN_MAS3, r11 #endif +#ifdef CONFIG_E200 + /* Round robin TLB1 entries assignment */ + mfspr r12, SPRN_MAS0 + + /* Extract TLB1CFG(NENTRY) */ + mfspr r11, SPRN_TLB1CFG + andi. r11, r11, 0xfff + + /* Extract MAS0(NV) */ + andi. r13, r12, 0xfff + addi r13, r13, 1 + cmpw 0, r13, r11 + addi r12, r12, 1 + + /* check if we need to wrap */ + blt 7f + + /* wrap back to first free tlbcam entry */ + lis r13, tlbcam_index@ha + lwz r13, tlbcam_index@l(r13) + rlwimi r12, r13, 0, 20, 31 +7: + mtspr SPRN_MAS0,r12 +#endif /* CONFIG_E200 */ + tlbwe /* Done...restore registers and get out of here. */ diff --git a/arch/ppc/kernel/machine_kexec.c b/arch/ppc/kernel/machine_kexec.c new file mode 100644 index 000000000000..84d65a87191e --- /dev/null +++ b/arch/ppc/kernel/machine_kexec.c @@ -0,0 +1,118 @@ +/* + * machine_kexec.c - handle transition of Linux booting another kernel + * Copyright (C) 2002-2003 Eric Biederman <ebiederm@xmission.com> + * + * GameCube/ppc32 port Copyright (C) 2004 Albert Herranz + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#include <linux/mm.h> +#include <linux/kexec.h> +#include <linux/delay.h> +#include <linux/reboot.h> +#include <asm/pgtable.h> +#include <asm/pgalloc.h> +#include <asm/mmu_context.h> +#include <asm/io.h> +#include <asm/hw_irq.h> +#include <asm/cacheflush.h> +#include <asm/machdep.h> + +typedef NORET_TYPE void (*relocate_new_kernel_t)( + unsigned long indirection_page, + unsigned long reboot_code_buffer, + unsigned long start_address) ATTRIB_NORET; + +const extern unsigned char relocate_new_kernel[]; +const extern unsigned int relocate_new_kernel_size; + +void machine_shutdown(void) +{ + if (ppc_md.machine_shutdown) + ppc_md.machine_shutdown(); +} + +void machine_crash_shutdown(struct pt_regs *regs) +{ + if (ppc_md.machine_crash_shutdown) + ppc_md.machine_crash_shutdown(); +} + +/* + * Do what every setup is needed on image and the + * reboot code buffer to allow us to avoid allocations + * later. + */ +int machine_kexec_prepare(struct kimage *image) +{ + if (ppc_md.machine_kexec_prepare) + return ppc_md.machine_kexec_prepare(image); + /* + * Fail if platform doesn't provide its own machine_kexec_prepare + * implementation. + */ + return -ENOSYS; +} + +void machine_kexec_cleanup(struct kimage *image) +{ + if (ppc_md.machine_kexec_cleanup) + ppc_md.machine_kexec_cleanup(image); +} + +/* + * Do not allocate memory (or fail in any way) in machine_kexec(). + * We are past the point of no return, committed to rebooting now. + */ +NORET_TYPE void machine_kexec(struct kimage *image) +{ + if (ppc_md.machine_kexec) + ppc_md.machine_kexec(image); + else { + /* + * Fall back to normal restart if platform doesn't provide + * its own kexec function, and user insist to kexec... + */ + machine_restart(NULL); + } + for(;;); +} + +/* + * This is a generic machine_kexec function suitable at least for + * non-OpenFirmware embedded platforms. + * It merely copies the image relocation code to the control page and + * jumps to it. + * A platform specific function may just call this one. + */ +void machine_kexec_simple(struct kimage *image) +{ + unsigned long page_list; + unsigned long reboot_code_buffer, reboot_code_buffer_phys; + relocate_new_kernel_t rnk; + + /* Interrupts aren't acceptable while we reboot */ + local_irq_disable(); + + page_list = image->head; + + /* we need both effective and real address here */ + reboot_code_buffer = + (unsigned long)page_address(image->control_code_page); + reboot_code_buffer_phys = virt_to_phys((void *)reboot_code_buffer); + + /* copy our kernel relocation code to the control code page */ + memcpy((void *)reboot_code_buffer, relocate_new_kernel, + relocate_new_kernel_size); + + flush_icache_range(reboot_code_buffer, + reboot_code_buffer + KEXEC_CONTROL_CODE_SIZE); + printk(KERN_INFO "Bye!\n"); + + /* now call it */ + rnk = (relocate_new_kernel_t) reboot_code_buffer; + (*rnk)(page_list, reboot_code_buffer_phys, image->start); +} + diff --git a/arch/ppc/kernel/misc.S b/arch/ppc/kernel/misc.S index 7329ef177a18..b6a63a49a232 100644 --- a/arch/ppc/kernel/misc.S +++ b/arch/ppc/kernel/misc.S @@ -593,6 +593,14 @@ _GLOBAL(flush_instruction_cache) iccci 0,r3 #endif #elif CONFIG_FSL_BOOKE +BEGIN_FTR_SECTION + mfspr r3,SPRN_L1CSR0 + ori r3,r3,L1CSR0_CFI|L1CSR0_CLFC + /* msync; isync recommended here */ + mtspr SPRN_L1CSR0,r3 + isync + blr +END_FTR_SECTION_IFCLR(CPU_FTR_SPLIT_ID_CACHE) mfspr r3,SPRN_L1CSR1 ori r3,r3,L1CSR1_ICFI|L1CSR1_ICLFR mtspr SPRN_L1CSR1,r3 @@ -1436,7 +1444,7 @@ _GLOBAL(sys_call_table) .long sys_mq_timedreceive /* 265 */ .long sys_mq_notify .long sys_mq_getsetattr - .long sys_ni_syscall /* 268 reserved for sys_kexec_load */ + .long sys_kexec_load .long sys_add_key .long sys_request_key /* 270 */ .long sys_keyctl diff --git a/arch/ppc/kernel/perfmon.c b/arch/ppc/kernel/perfmon.c index 918f6b252e45..fa1dad96b830 100644 --- a/arch/ppc/kernel/perfmon.c +++ b/arch/ppc/kernel/perfmon.c @@ -36,7 +36,7 @@ /* A lock to regulate grabbing the interrupt */ DEFINE_SPINLOCK(perfmon_lock); -#ifdef CONFIG_FSL_BOOKE +#if defined (CONFIG_FSL_BOOKE) && !defined (CONFIG_E200) static void dummy_perf(struct pt_regs *regs) { unsigned int pmgc0 = mfpmr(PMRN_PMGC0); diff --git a/arch/ppc/kernel/relocate_kernel.S b/arch/ppc/kernel/relocate_kernel.S new file mode 100644 index 000000000000..7ff69c4af920 --- /dev/null +++ b/arch/ppc/kernel/relocate_kernel.S @@ -0,0 +1,123 @@ +/* + * relocate_kernel.S - put the kernel image in place to boot + * Copyright (C) 2002-2003 Eric Biederman <ebiederm@xmission.com> + * + * GameCube/ppc32 port Copyright (C) 2004 Albert Herranz + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#include <asm/reg.h> +#include <asm/ppc_asm.h> +#include <asm/processor.h> + +#include <asm/kexec.h> + +#define PAGE_SIZE 4096 /* must be same value as in <asm/page.h> */ + + /* + * Must be relocatable PIC code callable as a C function. + */ + .globl relocate_new_kernel +relocate_new_kernel: + /* r3 = page_list */ + /* r4 = reboot_code_buffer */ + /* r5 = start_address */ + + li r0, 0 + + /* + * Set Machine Status Register to a known status, + * switch the MMU off and jump to 1: in a single step. + */ + + mr r8, r0 + ori r8, r8, MSR_RI|MSR_ME + mtspr SRR1, r8 + addi r8, r4, 1f - relocate_new_kernel + mtspr SRR0, r8 + sync + rfi + +1: + /* from this point address translation is turned off */ + /* and interrupts are disabled */ + + /* set a new stack at the bottom of our page... */ + /* (not really needed now) */ + addi r1, r4, KEXEC_CONTROL_CODE_SIZE - 8 /* for LR Save+Back Chain */ + stw r0, 0(r1) + + /* Do the copies */ + li r6, 0 /* checksum */ + mr r0, r3 + b 1f + +0: /* top, read another word for the indirection page */ + lwzu r0, 4(r3) + +1: + /* is it a destination page? (r8) */ + rlwinm. r7, r0, 0, 31, 31 /* IND_DESTINATION (1<<0) */ + beq 2f + + rlwinm r8, r0, 0, 0, 19 /* clear kexec flags, page align */ + b 0b + +2: /* is it an indirection page? (r3) */ + rlwinm. r7, r0, 0, 30, 30 /* IND_INDIRECTION (1<<1) */ + beq 2f + + rlwinm r3, r0, 0, 0, 19 /* clear kexec flags, page align */ + subi r3, r3, 4 + b 0b + +2: /* are we done? */ + rlwinm. r7, r0, 0, 29, 29 /* IND_DONE (1<<2) */ + beq 2f + b 3f + +2: /* is it a source page? (r9) */ + rlwinm. r7, r0, 0, 28, 28 /* IND_SOURCE (1<<3) */ + beq 0b + + rlwinm r9, r0, 0, 0, 19 /* clear kexec flags, page align */ + + li r7, PAGE_SIZE / 4 + mtctr r7 + subi r9, r9, 4 + subi r8, r8, 4 +9: + lwzu r0, 4(r9) /* do the copy */ + xor r6, r6, r0 + stwu r0, 4(r8) + dcbst 0, r8 + sync + icbi 0, r8 + bdnz 9b + + addi r9, r9, 4 + addi r8, r8, 4 + b 0b + +3: + + /* To be certain of avoiding problems with self-modifying code + * execute a serializing instruction here. + */ + isync + sync + + /* jump to the entry point, usually the setup routine */ + mtlr r5 + blrl + +1: b 1b + +relocate_new_kernel_end: + + .globl relocate_new_kernel_size +relocate_new_kernel_size: + .long relocate_new_kernel_end - relocate_new_kernel + diff --git a/arch/ppc/kernel/traps.c b/arch/ppc/kernel/traps.c index 2ca8ecfeefd9..9e6ae5696650 100644 --- a/arch/ppc/kernel/traps.c +++ b/arch/ppc/kernel/traps.c @@ -173,13 +173,13 @@ static inline int check_io_access(struct pt_regs *regs) /* On 4xx, the reason for the machine check or program exception is in the ESR. */ #define get_reason(regs) ((regs)->dsisr) -#ifndef CONFIG_E500 +#ifndef CONFIG_FSL_BOOKE #define get_mc_reason(regs) ((regs)->dsisr) #else #define get_mc_reason(regs) (mfspr(SPRN_MCSR)) #endif #define REASON_FP ESR_FP -#define REASON_ILLEGAL ESR_PIL +#define REASON_ILLEGAL (ESR_PIL | ESR_PUO) #define REASON_PRIVILEGED ESR_PPR #define REASON_TRAP ESR_PTR @@ -302,7 +302,25 @@ void MachineCheckException(struct pt_regs *regs) printk("Bus - Instruction Parity Error\n"); if (reason & MCSR_BUS_RPERR) printk("Bus - Read Parity Error\n"); -#else /* !CONFIG_4xx && !CONFIG_E500 */ +#elif defined (CONFIG_E200) + printk("Machine check in kernel mode.\n"); + printk("Caused by (from MCSR=%lx): ", reason); + + if (reason & MCSR_MCP) + printk("Machine Check Signal\n"); + if (reason & MCSR_CP_PERR) + printk("Cache Push Parity Error\n"); + if (reason & MCSR_CPERR) + printk("Cache Parity Error\n"); + if (reason & MCSR_EXCP_ERR) + printk("ISI, ITLB, or Bus Error on first instruction fetch for an exception handler\n"); + if (reason & MCSR_BUS_IRERR) + printk("Bus - Read Bus Error on instruction fetch\n"); + if (reason & MCSR_BUS_DRERR) + printk("Bus - Read Bus Error on data load\n"); + if (reason & MCSR_BUS_WRERR) + printk("Bus - Write Bus Error on buffered store or cache line push\n"); +#else /* !CONFIG_4xx && !CONFIG_E500 && !CONFIG_E200 */ printk("Machine check in kernel mode.\n"); printk("Caused by (from SRR1=%lx): ", reason); switch (reason & 0x601F0000) { diff --git a/arch/ppc/mm/44x_mmu.c b/arch/ppc/mm/44x_mmu.c index 72f7c0d1c0ed..3d79ce281b67 100644 --- a/arch/ppc/mm/44x_mmu.c +++ b/arch/ppc/mm/44x_mmu.c @@ -39,7 +39,6 @@ #include <linux/vmalloc.h> #include <linux/init.h> #include <linux/delay.h> -#include <linux/bootmem.h> #include <linux/highmem.h> #include <asm/pgalloc.h> diff --git a/arch/ppc/mm/4xx_mmu.c b/arch/ppc/mm/4xx_mmu.c index a7f616140381..b7bcbc232f39 100644 --- a/arch/ppc/mm/4xx_mmu.c +++ b/arch/ppc/mm/4xx_mmu.c @@ -36,7 +36,6 @@ #include <linux/vmalloc.h> #include <linux/init.h> #include <linux/delay.h> -#include <linux/bootmem.h> #include <linux/highmem.h> #include <asm/pgalloc.h> diff --git a/arch/ppc/mm/fsl_booke_mmu.c b/arch/ppc/mm/fsl_booke_mmu.c index e07990efa046..af9ca0eb6d55 100644 --- a/arch/ppc/mm/fsl_booke_mmu.c +++ b/arch/ppc/mm/fsl_booke_mmu.c @@ -41,7 +41,6 @@ #include <linux/vmalloc.h> #include <linux/init.h> #include <linux/delay.h> -#include <linux/bootmem.h> #include <linux/highmem.h> #include <asm/pgalloc.h> @@ -126,7 +125,7 @@ void settlbcam(int index, unsigned long virt, phys_addr_t phys, flags |= _PAGE_COHERENT; #endif - TLBCAM[index].MAS0 = MAS0_TLBSEL(1) | MAS0_ESEL(index); + TLBCAM[index].MAS0 = MAS0_TLBSEL(1) | MAS0_ESEL(index) | MAS0_NV(index+1); TLBCAM[index].MAS1 = MAS1_VALID | MAS1_IPROT | MAS1_TSIZE(tsize) | MAS1_TID(pid); TLBCAM[index].MAS2 = virt & PAGE_MASK; diff --git a/arch/ppc/platforms/83xx/mpc834x_sys.c b/arch/ppc/platforms/83xx/mpc834x_sys.c index 37ece1542799..ddd04d4c1ea9 100644 --- a/arch/ppc/platforms/83xx/mpc834x_sys.c +++ b/arch/ppc/platforms/83xx/mpc834x_sys.c @@ -94,20 +94,24 @@ mpc834x_sys_setup_arch(void) /* setup the board related information for the enet controllers */ pdata = (struct gianfar_platform_data *) ppc_sys_get_pdata(MPC83xx_TSEC1); - pdata->board_flags = FSL_GIANFAR_BRD_HAS_PHY_INTR; - pdata->interruptPHY = MPC83xx_IRQ_EXT1; - pdata->phyid = 0; - /* fixup phy address */ - pdata->phy_reg_addr += binfo->bi_immr_base; - memcpy(pdata->mac_addr, binfo->bi_enetaddr, 6); + if (pdata) { + pdata->board_flags = FSL_GIANFAR_BRD_HAS_PHY_INTR; + pdata->interruptPHY = MPC83xx_IRQ_EXT1; + pdata->phyid = 0; + /* fixup phy address */ + pdata->phy_reg_addr += binfo->bi_immr_base; + memcpy(pdata->mac_addr, binfo->bi_enetaddr, 6); + } pdata = (struct gianfar_platform_data *) ppc_sys_get_pdata(MPC83xx_TSEC2); - pdata->board_flags = FSL_GIANFAR_BRD_HAS_PHY_INTR; - pdata->interruptPHY = MPC83xx_IRQ_EXT2; - pdata->phyid = 1; - /* fixup phy address */ - pdata->phy_reg_addr += binfo->bi_immr_base; - memcpy(pdata->mac_addr, binfo->bi_enet1addr, 6); + if (pdata) { + pdata->board_flags = FSL_GIANFAR_BRD_HAS_PHY_INTR; + pdata->interruptPHY = MPC83xx_IRQ_EXT2; + pdata->phyid = 1; + /* fixup phy address */ + pdata->phy_reg_addr += binfo->bi_immr_base; + memcpy(pdata->mac_addr, binfo->bi_enet1addr, 6); + } #ifdef CONFIG_BLK_DEV_INITRD if (initrd_start) diff --git a/arch/ppc/platforms/85xx/mpc8540_ads.c b/arch/ppc/platforms/85xx/mpc8540_ads.c index a2ed611cd936..ddd2e9a5bb12 100644 --- a/arch/ppc/platforms/85xx/mpc8540_ads.c +++ b/arch/ppc/platforms/85xx/mpc8540_ads.c @@ -92,28 +92,34 @@ mpc8540ads_setup_arch(void) /* setup the board related information for the enet controllers */ pdata = (struct gianfar_platform_data *) ppc_sys_get_pdata(MPC85xx_TSEC1); - pdata->board_flags = FSL_GIANFAR_BRD_HAS_PHY_INTR; - pdata->interruptPHY = MPC85xx_IRQ_EXT5; - pdata->phyid = 0; - /* fixup phy address */ - pdata->phy_reg_addr += binfo->bi_immr_base; - memcpy(pdata->mac_addr, binfo->bi_enetaddr, 6); + if (pdata) { + pdata->board_flags = FSL_GIANFAR_BRD_HAS_PHY_INTR; + pdata->interruptPHY = MPC85xx_IRQ_EXT5; + pdata->phyid = 0; + /* fixup phy address */ + pdata->phy_reg_addr += binfo->bi_immr_base; + memcpy(pdata->mac_addr, binfo->bi_enetaddr, 6); + } pdata = (struct gianfar_platform_data *) ppc_sys_get_pdata(MPC85xx_TSEC2); - pdata->board_flags = FSL_GIANFAR_BRD_HAS_PHY_INTR; - pdata->interruptPHY = MPC85xx_IRQ_EXT5; - pdata->phyid = 1; - /* fixup phy address */ - pdata->phy_reg_addr += binfo->bi_immr_base; - memcpy(pdata->mac_addr, binfo->bi_enet1addr, 6); - - pdata = (struct gianfar_platform_data *) ppc_sys_get_pdata(MPC85xx_FEC); - pdata->board_flags = 0; - pdata->interruptPHY = MPC85xx_IRQ_EXT5; - pdata->phyid = 3; - /* fixup phy address */ - pdata->phy_reg_addr += binfo->bi_immr_base; - memcpy(pdata->mac_addr, binfo->bi_enet2addr, 6); + if (pdata) { + pdata->board_flags = FSL_GIANFAR_BRD_HAS_PHY_INTR; + pdata->interruptPHY = MPC85xx_IRQ_EXT5; + pdata->phyid = 1; + /* fixup phy address */ + pdata->phy_reg_addr += binfo->bi_immr_base; + memcpy(pdata->mac_addr, binfo->bi_enet1addr, 6); + } + + if (pdata) { + pdata = (struct gianfar_platform_data *) ppc_sys_get_pdata(MPC85xx_FEC); + pdata->board_flags = 0; + pdata->interruptPHY = MPC85xx_IRQ_EXT5; + pdata->phyid = 3; + /* fixup phy address */ + pdata->phy_reg_addr += binfo->bi_immr_base; + memcpy(pdata->mac_addr, binfo->bi_enet2addr, 6); + } #ifdef CONFIG_BLK_DEV_INITRD if (initrd_start) diff --git a/arch/ppc/platforms/85xx/mpc8560_ads.c b/arch/ppc/platforms/85xx/mpc8560_ads.c index d87dfd5ce0a2..e18380258b68 100644 --- a/arch/ppc/platforms/85xx/mpc8560_ads.c +++ b/arch/ppc/platforms/85xx/mpc8560_ads.c @@ -90,20 +90,24 @@ mpc8560ads_setup_arch(void) /* setup the board related information for the enet controllers */ pdata = (struct gianfar_platform_data *) ppc_sys_get_pdata(MPC85xx_TSEC1); - pdata->board_flags = FSL_GIANFAR_BRD_HAS_PHY_INTR; - pdata->interruptPHY = MPC85xx_IRQ_EXT5; - pdata->phyid = 0; - /* fixup phy address */ - pdata->phy_reg_addr += binfo->bi_immr_base; - memcpy(pdata->mac_addr, binfo->bi_enetaddr, 6); + if (pdata) { + pdata->board_flags = FSL_GIANFAR_BRD_HAS_PHY_INTR; + pdata->interruptPHY = MPC85xx_IRQ_EXT5; + pdata->phyid = 0; + /* fixup phy address */ + pdata->phy_reg_addr += binfo->bi_immr_base; + memcpy(pdata->mac_addr, binfo->bi_enetaddr, 6); + } pdata = (struct gianfar_platform_data *) ppc_sys_get_pdata(MPC85xx_TSEC2); - pdata->board_flags = FSL_GIANFAR_BRD_HAS_PHY_INTR; - pdata->interruptPHY = MPC85xx_IRQ_EXT5; - pdata->phyid = 1; - /* fixup phy address */ - pdata->phy_reg_addr += binfo->bi_immr_base; - memcpy(pdata->mac_addr, binfo->bi_enet1addr, 6); + if (pdata) { + pdata->board_flags = FSL_GIANFAR_BRD_HAS_PHY_INTR; + pdata->interruptPHY = MPC85xx_IRQ_EXT5; + pdata->phyid = 1; + /* fixup phy address */ + pdata->phy_reg_addr += binfo->bi_immr_base; + memcpy(pdata->mac_addr, binfo->bi_enet1addr, 6); + } #ifdef CONFIG_BLK_DEV_INITRD if (initrd_start) diff --git a/arch/ppc/platforms/85xx/sbc8560.c b/arch/ppc/platforms/85xx/sbc8560.c index 3dbdd73618eb..165df94d4aa6 100644 --- a/arch/ppc/platforms/85xx/sbc8560.c +++ b/arch/ppc/platforms/85xx/sbc8560.c @@ -129,20 +129,24 @@ sbc8560_setup_arch(void) /* setup the board related information for the enet controllers */ pdata = (struct gianfar_platform_data *) ppc_sys_get_pdata(MPC85xx_TSEC1); - pdata->board_flags = FSL_GIANFAR_BRD_HAS_PHY_INTR; - pdata->interruptPHY = MPC85xx_IRQ_EXT6; - pdata->phyid = 25; - /* fixup phy address */ - pdata->phy_reg_addr += binfo->bi_immr_base; - memcpy(pdata->mac_addr, binfo->bi_enetaddr, 6); + if (pdata) { + pdata->board_flags = FSL_GIANFAR_BRD_HAS_PHY_INTR; + pdata->interruptPHY = MPC85xx_IRQ_EXT6; + pdata->phyid = 25; + /* fixup phy address */ + pdata->phy_reg_addr += binfo->bi_immr_base; + memcpy(pdata->mac_addr, binfo->bi_enetaddr, 6); + } pdata = (struct gianfar_platform_data *) ppc_sys_get_pdata(MPC85xx_TSEC2); - pdata->board_flags = FSL_GIANFAR_BRD_HAS_PHY_INTR; - pdata->interruptPHY = MPC85xx_IRQ_EXT7; - pdata->phyid = 26; - /* fixup phy address */ - pdata->phy_reg_addr += binfo->bi_immr_base; - memcpy(pdata->mac_addr, binfo->bi_enet1addr, 6); + if (pdata) { + pdata->board_flags = FSL_GIANFAR_BRD_HAS_PHY_INTR; + pdata->interruptPHY = MPC85xx_IRQ_EXT7; + pdata->phyid = 26; + /* fixup phy address */ + pdata->phy_reg_addr += binfo->bi_immr_base; + memcpy(pdata->mac_addr, binfo->bi_enet1addr, 6); + } #ifdef CONFIG_BLK_DEV_INITRD if (initrd_start) diff --git a/arch/ppc/platforms/85xx/stx_gp3.c b/arch/ppc/platforms/85xx/stx_gp3.c index 9455bb6b45e9..bb41265cfc85 100644 --- a/arch/ppc/platforms/85xx/stx_gp3.c +++ b/arch/ppc/platforms/85xx/stx_gp3.c @@ -122,19 +122,23 @@ gp3_setup_arch(void) /* setup the board related information for the enet controllers */ pdata = (struct gianfar_platform_data *) ppc_sys_get_pdata(MPC85xx_TSEC1); -/* pdata->board_flags = FSL_GIANFAR_BRD_HAS_PHY_INTR; */ - pdata->interruptPHY = MPC85xx_IRQ_EXT5; - pdata->phyid = 2; - pdata->phy_reg_addr += binfo->bi_immr_base; - memcpy(pdata->mac_addr, binfo->bi_enetaddr, 6); + if (pdata) { + /* pdata->board_flags = FSL_GIANFAR_BRD_HAS_PHY_INTR; */ + pdata->interruptPHY = MPC85xx_IRQ_EXT5; + pdata->phyid = 2; + pdata->phy_reg_addr += binfo->bi_immr_base; + memcpy(pdata->mac_addr, binfo->bi_enetaddr, 6); + } pdata = (struct gianfar_platform_data *) ppc_sys_get_pdata(MPC85xx_TSEC2); -/* pdata->board_flags = FSL_GIANFAR_BRD_HAS_PHY_INTR; */ - pdata->interruptPHY = MPC85xx_IRQ_EXT5; - pdata->phyid = 4; - /* fixup phy address */ - pdata->phy_reg_addr += binfo->bi_immr_base; - memcpy(pdata->mac_addr, binfo->bi_enet1addr, 6); + if (pdata) { + /* pdata->board_flags = FSL_GIANFAR_BRD_HAS_PHY_INTR; */ + pdata->interruptPHY = MPC85xx_IRQ_EXT5; + pdata->phyid = 4; + /* fixup phy address */ + pdata->phy_reg_addr += binfo->bi_immr_base; + memcpy(pdata->mac_addr, binfo->bi_enet1addr, 6); + } #ifdef CONFIG_BLK_DEV_INITRD if (initrd_start) diff --git a/arch/ppc/platforms/chrp_pci.c b/arch/ppc/platforms/chrp_pci.c index 7d0ee308f662..7d3fbb5c5db2 100644 --- a/arch/ppc/platforms/chrp_pci.c +++ b/arch/ppc/platforms/chrp_pci.c @@ -9,7 +9,6 @@ #include <linux/string.h> #include <linux/init.h> #include <linux/ide.h> -#include <linux/bootmem.h> #include <asm/io.h> #include <asm/pgtable.h> diff --git a/arch/ppc/platforms/katana.c b/arch/ppc/platforms/katana.c index eda922ac3167..169dbf6534b9 100644 --- a/arch/ppc/platforms/katana.c +++ b/arch/ppc/platforms/katana.c @@ -27,12 +27,12 @@ #include <linux/root_dev.h> #include <linux/delay.h> #include <linux/seq_file.h> -#include <linux/bootmem.h> #include <linux/mtd/physmap.h> #include <linux/mv643xx.h> #ifdef CONFIG_BOOTIMG #include <linux/bootimg.h> #endif +#include <asm/io.h> #include <asm/page.h> #include <asm/time.h> #include <asm/smp.h> diff --git a/arch/ppc/platforms/pmac_pci.c b/arch/ppc/platforms/pmac_pci.c index f6ff51924061..719fb49fe2bc 100644 --- a/arch/ppc/platforms/pmac_pci.c +++ b/arch/ppc/platforms/pmac_pci.c @@ -17,7 +17,6 @@ #include <linux/delay.h> #include <linux/string.h> #include <linux/init.h> -#include <linux/bootmem.h> #include <asm/sections.h> #include <asm/io.h> diff --git a/arch/ppc/syslib/cpm2_common.c b/arch/ppc/syslib/cpm2_common.c index ea5e77080e8d..4c19a4ac7163 100644 --- a/arch/ppc/syslib/cpm2_common.c +++ b/arch/ppc/syslib/cpm2_common.c @@ -21,8 +21,8 @@ #include <linux/string.h> #include <linux/mm.h> #include <linux/interrupt.h> -#include <linux/bootmem.h> #include <linux/module.h> +#include <asm/io.h> #include <asm/irq.h> #include <asm/mpc8260.h> #include <asm/page.h> diff --git a/arch/ppc/syslib/indirect_pci.c b/arch/ppc/syslib/indirect_pci.c index a5a752609e2c..e71488469704 100644 --- a/arch/ppc/syslib/indirect_pci.c +++ b/arch/ppc/syslib/indirect_pci.c @@ -14,7 +14,6 @@ #include <linux/delay.h> #include <linux/string.h> #include <linux/init.h> -#include <linux/bootmem.h> #include <asm/io.h> #include <asm/prom.h> diff --git a/arch/ppc/syslib/mv64x60.c b/arch/ppc/syslib/mv64x60.c index 7b241e7876bd..cc77177fa1c6 100644 --- a/arch/ppc/syslib/mv64x60.c +++ b/arch/ppc/syslib/mv64x60.c @@ -17,7 +17,6 @@ #include <linux/slab.h> #include <linux/module.h> #include <linux/string.h> -#include <linux/bootmem.h> #include <linux/spinlock.h> #include <linux/mv643xx.h> diff --git a/arch/ppc/syslib/mv64x60_win.c b/arch/ppc/syslib/mv64x60_win.c index b6f0f5dcf6ee..5b827e2bbe22 100644 --- a/arch/ppc/syslib/mv64x60_win.c +++ b/arch/ppc/syslib/mv64x60_win.c @@ -17,7 +17,6 @@ #include <linux/slab.h> #include <linux/module.h> #include <linux/string.h> -#include <linux/bootmem.h> #include <linux/mv643xx.h> #include <asm/byteorder.h> diff --git a/arch/ppc64/Kconfig b/arch/ppc64/Kconfig index cb27068bfcd4..f804f25232ac 100644 --- a/arch/ppc64/Kconfig +++ b/arch/ppc64/Kconfig @@ -142,6 +142,23 @@ config PPC_SPLPAR processors, that is, which share physical processors between two or more partitions. +config KEXEC + bool "kexec system call (EXPERIMENTAL)" + depends on PPC_MULTIPLATFORM && EXPERIMENTAL + help + kexec is a system call that implements the ability to shutdown your + current kernel, and to start another kernel. It is like a reboot + but it is indepedent of the system firmware. And like a reboot + you can start any kernel with it, not just Linux. + + The name comes from the similiarity to the exec system call. + + It is an ongoing process to be certain the hardware in a machine + is properly shutdown, so do not be surprised if this code does not + initially work for you. It may help to enable device hotplugging + support. As of this writing the exact hardware interface is + strongly in flux, so no good recommendation can be made. + config IBMVIO depends on PPC_PSERIES || PPC_ISERIES bool @@ -270,26 +287,7 @@ config SCHED_SMT when dealing with POWER5 cpus at a cost of slightly increased overhead in some places. If unsure say N here. -config PREEMPT - bool "Preemptible Kernel" - help - This option reduces the latency of the kernel when reacting to - real-time or interactive events by allowing a low priority process to - be preempted even if it is in kernel mode executing a system call. - - Say Y here if you are building a kernel for a desktop, embedded - or real-time system. Say N if you are unsure. - -config PREEMPT_BKL - bool "Preempt The Big Kernel Lock" - depends on PREEMPT - default y - help - This option reduces the latency of the kernel by making the - big kernel lock preemptible. - - Say Y here if you are building a kernel for a desktop system. - Say N if you are unsure. +source "kernel/Kconfig.preempt" config EEH bool "PCI Extended Error Handling (EEH)" if EMBEDDED diff --git a/arch/ppc64/kernel/Makefile b/arch/ppc64/kernel/Makefile index dffbfb7ac8d5..d9b2660ef221 100644 --- a/arch/ppc64/kernel/Makefile +++ b/arch/ppc64/kernel/Makefile @@ -36,6 +36,7 @@ obj-$(CONFIG_PPC_PSERIES) += pSeries_pci.o pSeries_lpar.o pSeries_hvCall.o \ obj-$(CONFIG_PPC_BPA) += bpa_setup.o bpa_iommu.o bpa_nvram.o \ bpa_iic.o spider-pic.o +obj-$(CONFIG_KEXEC) += machine_kexec.o obj-$(CONFIG_EEH) += eeh.o obj-$(CONFIG_PROC_FS) += proc_ppc64.o obj-$(CONFIG_RTAS_FLASH) += rtas_flash.o diff --git a/arch/ppc64/kernel/head.S b/arch/ppc64/kernel/head.S index 02c8f4e3e4bc..675c2708588f 100644 --- a/arch/ppc64/kernel/head.S +++ b/arch/ppc64/kernel/head.S @@ -1194,7 +1194,7 @@ _GLOBAL(pSeries_secondary_smp_init) bl .__restore_cpu_setup /* Set up a paca value for this processor. Since we have the - * physical cpu id in r3, we need to search the pacas to find + * physical cpu id in r24, we need to search the pacas to find * which logical id maps to our physical one. */ LOADADDR(r13, paca) /* Get base vaddr of paca array */ @@ -1207,8 +1207,8 @@ _GLOBAL(pSeries_secondary_smp_init) cmpwi r5,NR_CPUS blt 1b -99: HMT_LOW /* Couldn't find our CPU id */ - b 99b + mr r3,r24 /* not found, copy phys to r3 */ + b .kexec_wait /* next kernel might do better */ 2: mtspr SPRG3,r13 /* Save vaddr of paca in SPRG3 */ /* From now on, r24 is expected to be logica cpuid */ diff --git a/arch/ppc64/kernel/lparcfg.c b/arch/ppc64/kernel/lparcfg.c index 387923fcf9b0..02e96627fa66 100644 --- a/arch/ppc64/kernel/lparcfg.c +++ b/arch/ppc64/kernel/lparcfg.c @@ -34,6 +34,7 @@ #include <asm/system.h> #include <asm/time.h> #include <asm/iSeries/ItExtVpdPanel.h> +#include <asm/prom.h> #define MODULE_VERS "1.6" #define MODULE_NAME "lparcfg" diff --git a/arch/ppc64/kernel/machine_kexec.c b/arch/ppc64/kernel/machine_kexec.c new file mode 100644 index 000000000000..fdb2fc649d72 --- /dev/null +++ b/arch/ppc64/kernel/machine_kexec.c @@ -0,0 +1,302 @@ +/* + * machine_kexec.c - handle transition of Linux booting another kernel + * + * Copyright (C) 2004-2005, IBM Corp. + * + * Created by: Milton D Miller II + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + + +#include <linux/cpumask.h> +#include <linux/kexec.h> +#include <linux/smp.h> +#include <linux/thread_info.h> +#include <linux/errno.h> + +#include <asm/page.h> +#include <asm/current.h> +#include <asm/machdep.h> +#include <asm/cacheflush.h> +#include <asm/paca.h> +#include <asm/mmu.h> +#include <asm/sections.h> /* _end */ +#include <asm/prom.h> + +#define HASH_GROUP_SIZE 0x80 /* size of each hash group, asm/mmu.h */ + +/* Have this around till we move it into crash specific file */ +note_buf_t crash_notes[NR_CPUS]; + +/* Dummy for now. Not sure if we need to have a crash shutdown in here + * and if what it will achieve. Letting it be now to compile the code + * in generic kexec environment + */ +void machine_crash_shutdown(struct pt_regs *regs) +{ + /* do nothing right now */ + /* smp_relase_cpus() if we want smp on panic kernel */ + /* cpu_irq_down to isolate us until we are ready */ +} + +int machine_kexec_prepare(struct kimage *image) +{ + int i; + unsigned long begin, end; /* limits of segment */ + unsigned long low, high; /* limits of blocked memory range */ + struct device_node *node; + unsigned long *basep; + unsigned int *sizep; + + if (!ppc_md.hpte_clear_all) + return -ENOENT; + + /* + * Since we use the kernel fault handlers and paging code to + * handle the virtual mode, we must make sure no destination + * overlaps kernel static data or bss. + */ + for (i = 0; i < image->nr_segments; i++) + if (image->segment[i].mem < __pa(_end)) + return -ETXTBSY; + + /* + * For non-LPAR, we absolutely can not overwrite the mmu hash + * table, since we are still using the bolted entries in it to + * do the copy. Check that here. + * + * It is safe if the end is below the start of the blocked + * region (end <= low), or if the beginning is after the + * end of the blocked region (begin >= high). Use the + * boolean identity !(a || b) === (!a && !b). + */ + if (htab_address) { + low = __pa(htab_address); + high = low + (htab_hash_mask + 1) * HASH_GROUP_SIZE; + + for (i = 0; i < image->nr_segments; i++) { + begin = image->segment[i].mem; + end = begin + image->segment[i].memsz; + + if ((begin < high) && (end > low)) + return -ETXTBSY; + } + } + + /* We also should not overwrite the tce tables */ + for (node = of_find_node_by_type(NULL, "pci"); node != NULL; + node = of_find_node_by_type(node, "pci")) { + basep = (unsigned long *)get_property(node, "linux,tce-base", + NULL); + sizep = (unsigned int *)get_property(node, "linux,tce-size", + NULL); + if (basep == NULL || sizep == NULL) + continue; + + low = *basep; + high = low + (*sizep); + + for (i = 0; i < image->nr_segments; i++) { + begin = image->segment[i].mem; + end = begin + image->segment[i].memsz; + + if ((begin < high) && (end > low)) + return -ETXTBSY; + } + } + + return 0; +} + +void machine_kexec_cleanup(struct kimage *image) +{ + /* we do nothing in prepare that needs to be undone */ +} + +#define IND_FLAGS (IND_DESTINATION | IND_INDIRECTION | IND_DONE | IND_SOURCE) + +static void copy_segments(unsigned long ind) +{ + unsigned long entry; + unsigned long *ptr; + void *dest; + void *addr; + + /* + * We rely on kexec_load to create a lists that properly + * initializes these pointers before they are used. + * We will still crash if the list is wrong, but at least + * the compiler will be quiet. + */ + ptr = NULL; + dest = NULL; + + for (entry = ind; !(entry & IND_DONE); entry = *ptr++) { + addr = __va(entry & PAGE_MASK); + + switch (entry & IND_FLAGS) { + case IND_DESTINATION: + dest = addr; + break; + case IND_INDIRECTION: + ptr = addr; + break; + case IND_SOURCE: + copy_page(dest, addr); + dest += PAGE_SIZE; + } + } +} + +void kexec_copy_flush(struct kimage *image) +{ + long i, nr_segments = image->nr_segments; + struct kexec_segment ranges[KEXEC_SEGMENT_MAX]; + + /* save the ranges on the stack to efficiently flush the icache */ + memcpy(ranges, image->segment, sizeof(ranges)); + + /* + * After this call we may not use anything allocated in dynamic + * memory, including *image. + * + * Only globals and the stack are allowed. + */ + copy_segments(image->head); + + /* + * we need to clear the icache for all dest pages sometime, + * including ones that were in place on the original copy + */ + for (i = 0; i < nr_segments; i++) + flush_icache_range(ranges[i].mem + KERNELBASE, + ranges[i].mem + KERNELBASE + + ranges[i].memsz); +} + +#ifdef CONFIG_SMP + +/* FIXME: we should schedule this function to be called on all cpus based + * on calling the interrupts, but we would like to call it off irq level + * so that the interrupt controller is clean. + */ +void kexec_smp_down(void *arg) +{ + if (ppc_md.cpu_irq_down) + ppc_md.cpu_irq_down(); + + local_irq_disable(); + kexec_smp_wait(); + /* NOTREACHED */ +} + +static void kexec_prepare_cpus(void) +{ + int my_cpu, i, notified=-1; + + smp_call_function(kexec_smp_down, NULL, 0, /* wait */0); + my_cpu = get_cpu(); + + /* check the others cpus are now down (via paca hw cpu id == -1) */ + for (i=0; i < NR_CPUS; i++) { + if (i == my_cpu) + continue; + + while (paca[i].hw_cpu_id != -1) { + if (!cpu_possible(i)) { + printk("kexec: cpu %d hw_cpu_id %d is not" + " possible, ignoring\n", + i, paca[i].hw_cpu_id); + break; + } + if (!cpu_online(i)) { + /* Fixme: this can be spinning in + * pSeries_secondary_wait with a paca + * waiting for it to go online. + */ + printk("kexec: cpu %d hw_cpu_id %d is not" + " online, ignoring\n", + i, paca[i].hw_cpu_id); + break; + } + if (i != notified) { + printk( "kexec: waiting for cpu %d (physical" + " %d) to go down\n", + i, paca[i].hw_cpu_id); + notified = i; + } + } + } + + /* after we tell the others to go down */ + if (ppc_md.cpu_irq_down) + ppc_md.cpu_irq_down(); + + put_cpu(); + + local_irq_disable(); +} + +#else /* ! SMP */ + +static void kexec_prepare_cpus(void) +{ + /* + * move the secondarys to us so that we can copy + * the new kernel 0-0x100 safely + * + * do this if kexec in setup.c ? + */ + smp_relase_cpus(); + if (ppc_md.cpu_irq_down) + ppc_md.cpu_irq_down(); + local_irq_disable(); +} + +#endif /* SMP */ + +/* + * kexec thread structure and stack. + * + * We need to make sure that this is 16384-byte aligned due to the + * way process stacks are handled. It also must be statically allocated + * or allocated as part of the kimage, because everything else may be + * overwritten when we copy the kexec image. We piggyback on the + * "init_task" linker section here to statically allocate a stack. + * + * We could use a smaller stack if we don't care about anything using + * current, but that audit has not been performed. + */ +union thread_union kexec_stack + __attribute__((__section__(".data.init_task"))) = { }; + +/* Our assembly helper, in kexec_stub.S */ +extern NORET_TYPE void kexec_sequence(void *newstack, unsigned long start, + void *image, void *control, + void (*clear_all)(void)) ATTRIB_NORET; + +/* too late to fail here */ +void machine_kexec(struct kimage *image) +{ + + /* prepare control code if any */ + + /* shutdown other cpus into our wait loop and quiesce interrupts */ + kexec_prepare_cpus(); + + /* switch to a staticly allocated stack. Based on irq stack code. + * XXX: the task struct will likely be invalid once we do the copy! + */ + kexec_stack.thread_info.task = current_thread_info()->task; + kexec_stack.thread_info.flags = 0; + + /* Some things are best done in assembly. Finding globals with + * a toc is easier in C, so pass in what we can. + */ + kexec_sequence(&kexec_stack, image->start, image, + page_address(image->control_code_page), + ppc_md.hpte_clear_all); + /* NOTREACHED */ +} diff --git a/arch/ppc64/kernel/misc.S b/arch/ppc64/kernel/misc.S index e3c73b3425dc..f3dea0c5a88c 100644 --- a/arch/ppc64/kernel/misc.S +++ b/arch/ppc64/kernel/misc.S @@ -680,6 +680,177 @@ _GLOBAL(kernel_thread) ld r30,-16(r1) blr +/* kexec_wait(phys_cpu) + * + * wait for the flag to change, indicating this kernel is going away but + * the slave code for the next one is at addresses 0 to 100. + * + * This is used by all slaves. + * + * Physical (hardware) cpu id should be in r3. + */ +_GLOBAL(kexec_wait) + bl 1f +1: mflr r5 + addi r5,r5,kexec_flag-1b + +99: HMT_LOW +#ifdef CONFIG_KEXEC /* use no memory without kexec */ + lwz r4,0(r5) + cmpwi 0,r4,0 + bnea 0x60 +#endif + b 99b + +/* this can be in text because we won't change it until we are + * running in real anyways + */ +kexec_flag: + .long 0 + + +#ifdef CONFIG_KEXEC + +/* kexec_smp_wait(void) + * + * call with interrupts off + * note: this is a terminal routine, it does not save lr + * + * get phys id from paca + * set paca id to -1 to say we got here + * switch to real mode + * join other cpus in kexec_wait(phys_id) + */ +_GLOBAL(kexec_smp_wait) + lhz r3,PACAHWCPUID(r13) + li r4,-1 + sth r4,PACAHWCPUID(r13) /* let others know we left */ + bl real_mode + b .kexec_wait + +/* + * switch to real mode (turn mmu off) + * we use the early kernel trick that the hardware ignores bits + * 0 and 1 (big endian) of the effective address in real mode + * + * don't overwrite r3 here, it is live for kexec_wait above. + */ +real_mode: /* assume normal blr return */ +1: li r9,MSR_RI + li r10,MSR_DR|MSR_IR + mflr r11 /* return address to SRR0 */ + mfmsr r12 + andc r9,r12,r9 + andc r10,r12,r10 + + mtmsrd r9,1 + mtspr SPRN_SRR1,r10 + mtspr SPRN_SRR0,r11 + rfid + + +/* + * kexec_sequence(newstack, start, image, control, clear_all()) + * + * does the grungy work with stack switching and real mode switches + * also does simple calls to other code + */ + +_GLOBAL(kexec_sequence) + mflr r0 + std r0,16(r1) + + /* switch stacks to newstack -- &kexec_stack.stack */ + stdu r1,THREAD_SIZE-112(r3) + mr r1,r3 + + li r0,0 + std r0,16(r1) + + /* save regs for local vars on new stack. + * yes, we won't go back, but ... + */ + std r31,-8(r1) + std r30,-16(r1) + std r29,-24(r1) + std r28,-32(r1) + std r27,-40(r1) + std r26,-48(r1) + std r25,-56(r1) + + stdu r1,-112-64(r1) + + /* save args into preserved regs */ + mr r31,r3 /* newstack (both) */ + mr r30,r4 /* start (real) */ + mr r29,r5 /* image (virt) */ + mr r28,r6 /* control, unused */ + mr r27,r7 /* clear_all() fn desc */ + mr r26,r8 /* spare */ + lhz r25,PACAHWCPUID(r13) /* get our phys cpu from paca */ + + /* disable interrupts, we are overwriting kernel data next */ + mfmsr r3 + rlwinm r3,r3,0,17,15 + mtmsrd r3,1 + + /* copy dest pages, flush whole dest image */ + mr r3,r29 + bl .kexec_copy_flush /* (image) */ + + /* turn off mmu */ + bl real_mode + + /* clear out hardware hash page table and tlb */ + ld r5,0(r27) /* deref function descriptor */ + mtctr r5 + bctrl /* ppc_md.hash_clear_all(void); */ + +/* + * kexec image calling is: + * the first 0x100 bytes of the entry point are copied to 0 + * + * all slaves branch to slave = 0x60 (absolute) + * slave(phys_cpu_id); + * + * master goes to start = entry point + * start(phys_cpu_id, start, 0); + * + * + * a wrapper is needed to call existing kernels, here is an approximate + * description of one method: + * + * v2: (2.6.10) + * start will be near the boot_block (maybe 0x100 bytes before it?) + * it will have a 0x60, which will b to boot_block, where it will wait + * and 0 will store phys into struct boot-block and load r3 from there, + * copy kernel 0-0x100 and tell slaves to back down to 0x60 again + * + * v1: (2.6.9) + * boot block will have all cpus scanning device tree to see if they + * are the boot cpu ????? + * other device tree differences (prop sizes, va vs pa, etc)... + */ + + /* copy 0x100 bytes starting at start to 0 */ + li r3,0 + mr r4,r30 + li r5,0x100 + li r6,0 + bl .copy_and_flush /* (dest, src, copy limit, start offset) */ +1: /* assume normal blr return */ + + /* release other cpus to the new kernel secondary start at 0x60 */ + mflr r5 + li r6,1 + stw r6,kexec_flag-1b(5) + mr r3,r25 # my phys cpu + mr r4,r30 # start, aka phys mem offset + mtlr 4 + li r5,0 + blr /* image->start(physid, image->start, 0); */ +#endif /* CONFIG_KEXEC */ + /* Why isn't this a) automatic, b) written in 'C'? */ .balign 8 _GLOBAL(sys_call_table32) @@ -951,7 +1122,7 @@ _GLOBAL(sys_call_table32) .llong .compat_sys_mq_timedreceive /* 265 */ .llong .compat_sys_mq_notify .llong .compat_sys_mq_getsetattr - .llong .sys_ni_syscall /* 268 reserved for sys_kexec_load */ + .llong .compat_sys_kexec_load .llong .sys32_add_key .llong .sys32_request_key .llong .compat_sys_keyctl @@ -1227,7 +1398,7 @@ _GLOBAL(sys_call_table) .llong .sys_mq_timedreceive /* 265 */ .llong .sys_mq_notify .llong .sys_mq_getsetattr - .llong .sys_ni_syscall /* 268 reserved for sys_kexec_load */ + .llong .sys_kexec_load .llong .sys_add_key .llong .sys_request_key /* 270 */ .llong .sys_keyctl diff --git a/arch/ppc64/kernel/mpic.c b/arch/ppc64/kernel/mpic.c index 593ea5b82afa..e8fbab1df37f 100644 --- a/arch/ppc64/kernel/mpic.c +++ b/arch/ppc64/kernel/mpic.c @@ -792,6 +792,35 @@ void mpic_setup_this_cpu(void) #endif /* CONFIG_SMP */ } +/* + * XXX: someone who knows mpic should check this. + * do we need to eoi the ipi here (see xics comments)? + * or can we reset the mpic in the new kernel? + */ +void mpic_teardown_this_cpu(void) +{ + struct mpic *mpic = mpic_primary; + unsigned long flags; + u32 msk = 1 << hard_smp_processor_id(); + unsigned int i; + + BUG_ON(mpic == NULL); + + DBG("%s: teardown_this_cpu(%d)\n", mpic->name, hard_smp_processor_id()); + spin_lock_irqsave(&mpic_lock, flags); + + /* let the mpic know we don't want intrs. */ + for (i = 0; i < mpic->num_sources ; i++) + mpic_irq_write(i, MPIC_IRQ_DESTINATION, + mpic_irq_read(i, MPIC_IRQ_DESTINATION) & ~msk); + + /* Set current processor priority to max */ + mpic_cpu_write(MPIC_CPU_CURRENT_TASK_PRI, 0xf); + + spin_unlock_irqrestore(&mpic_lock, flags); +} + + void mpic_send_ipi(unsigned int ipi_no, unsigned int cpu_mask) { struct mpic *mpic = mpic_primary; diff --git a/arch/ppc64/kernel/mpic.h b/arch/ppc64/kernel/mpic.h index 63e177143eac..99fbbc9a084c 100644 --- a/arch/ppc64/kernel/mpic.h +++ b/arch/ppc64/kernel/mpic.h @@ -255,6 +255,9 @@ extern unsigned int mpic_irq_get_priority(unsigned int irq); /* Setup a non-boot CPU */ extern void mpic_setup_this_cpu(void); +/* Clean up for kexec (or cpu offline or ...) */ +extern void mpic_teardown_this_cpu(void); + /* Request IPIs on primary mpic */ extern void mpic_request_ipis(void); diff --git a/arch/ppc64/kernel/pSeries_setup.c b/arch/ppc64/kernel/pSeries_setup.c index f2b41243342c..44d9af72d225 100644 --- a/arch/ppc64/kernel/pSeries_setup.c +++ b/arch/ppc64/kernel/pSeries_setup.c @@ -187,14 +187,16 @@ static void __init pSeries_setup_arch(void) { /* Fixup ppc_md depending on the type of interrupt controller */ if (ppc64_interrupt_controller == IC_OPEN_PIC) { - ppc_md.init_IRQ = pSeries_init_mpic; + ppc_md.init_IRQ = pSeries_init_mpic; ppc_md.get_irq = mpic_get_irq; + ppc_md.cpu_irq_down = mpic_teardown_this_cpu; /* Allocate the mpic now, so that find_and_init_phbs() can * fill the ISUs */ pSeries_setup_mpic(); } else { ppc_md.init_IRQ = xics_init_IRQ; ppc_md.get_irq = xics_get_irq; + ppc_md.cpu_irq_down = xics_teardown_cpu; } #ifdef CONFIG_SMP diff --git a/arch/ppc64/kernel/pSeries_smp.c b/arch/ppc64/kernel/pSeries_smp.c index 30154140f7e2..62c55a123560 100644 --- a/arch/ppc64/kernel/pSeries_smp.c +++ b/arch/ppc64/kernel/pSeries_smp.c @@ -93,10 +93,13 @@ static int query_cpu_stopped(unsigned int pcpu) int pSeries_cpu_disable(void) { + int cpu = smp_processor_id(); + + cpu_clear(cpu, cpu_online_map); systemcfg->processorCount--; /*fix boot_cpuid here*/ - if (smp_processor_id() == boot_cpuid) + if (cpu == boot_cpuid) boot_cpuid = any_online_cpu(cpu_online_map); /* FIXME: abstract this to not be platform specific later on */ diff --git a/arch/ppc64/kernel/setup.c b/arch/ppc64/kernel/setup.c index 0a47a5ef428d..d5e4866e9ac2 100644 --- a/arch/ppc64/kernel/setup.c +++ b/arch/ppc64/kernel/setup.c @@ -677,11 +677,16 @@ void __init setup_system(void) DBG(" <- setup_system()\n"); } - -void machine_restart(char *cmd) +/* also used by kexec */ +void machine_shutdown(void) { if (ppc_md.nvram_sync) ppc_md.nvram_sync(); +} + +void machine_restart(char *cmd) +{ + machine_shutdown(); ppc_md.restart(cmd); #ifdef CONFIG_SMP smp_send_stop(); @@ -690,13 +695,11 @@ void machine_restart(char *cmd) local_irq_disable(); while (1) ; } - EXPORT_SYMBOL(machine_restart); - + void machine_power_off(void) { - if (ppc_md.nvram_sync) - ppc_md.nvram_sync(); + machine_shutdown(); ppc_md.power_off(); #ifdef CONFIG_SMP smp_send_stop(); @@ -705,13 +708,11 @@ void machine_power_off(void) local_irq_disable(); while (1) ; } - EXPORT_SYMBOL(machine_power_off); - + void machine_halt(void) { - if (ppc_md.nvram_sync) - ppc_md.nvram_sync(); + machine_shutdown(); ppc_md.halt(); #ifdef CONFIG_SMP smp_send_stop(); @@ -720,7 +721,6 @@ void machine_halt(void) local_irq_disable(); while (1) ; } - EXPORT_SYMBOL(machine_halt); static int ppc64_panic_event(struct notifier_block *this, diff --git a/arch/ppc64/kernel/xics.c b/arch/ppc64/kernel/xics.c index 879f39b90a33..677c4450984a 100644 --- a/arch/ppc64/kernel/xics.c +++ b/arch/ppc64/kernel/xics.c @@ -647,6 +647,31 @@ static void xics_set_affinity(unsigned int virq, cpumask_t cpumask) } } +void xics_teardown_cpu(void) +{ + int cpu = smp_processor_id(); + int status; + + ops->cppr_info(cpu, 0x00); + iosync(); + + /* + * we need to EOI the IPI if we got here from kexec down IPI + * + * xics doesn't care if we duplicate an EOI as long as we + * don't EOI and raise priority. + * + * probably need to check all the other interrupts too + * should we be flagging idle loop instead? + * or creating some task to be scheduled? + */ + ops->xirr_info_set(cpu, XICS_IPI); + + status = rtas_set_indicator(GLOBAL_INTERRUPT_QUEUE, + (1UL << interrupt_server_size) - 1 - default_distrib_server, 0); + WARN_ON(status != 0); +} + #ifdef CONFIG_HOTPLUG_CPU /* Interrupts are disabled. */ diff --git a/arch/ppc64/mm/hash_native.c b/arch/ppc64/mm/hash_native.c index 52b6b9305341..4fec05817d66 100644 --- a/arch/ppc64/mm/hash_native.c +++ b/arch/ppc64/mm/hash_native.c @@ -304,6 +304,50 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long va, local_irq_restore(flags); } +/* + * clear all mappings on kexec. All cpus are in real mode (or they will + * be when they isi), and we are the only one left. We rely on our kernel + * mapping being 0xC0's and the hardware ignoring those two real bits. + * + * TODO: add batching support when enabled. remember, no dynamic memory here, + * athough there is the control page available... + */ +static void native_hpte_clear(void) +{ + unsigned long slot, slots, flags; + HPTE *hptep = htab_address; + Hpte_dword0 dw0; + unsigned long pteg_count; + + pteg_count = htab_hash_mask + 1; + + local_irq_save(flags); + + /* we take the tlbie lock and hold it. Some hardware will + * deadlock if we try to tlbie from two processors at once. + */ + spin_lock(&native_tlbie_lock); + + slots = pteg_count * HPTES_PER_GROUP; + + for (slot = 0; slot < slots; slot++, hptep++) { + /* + * we could lock the pte here, but we are the only cpu + * running, right? and for crash dump, we probably + * don't want to wait for a maybe bad cpu. + */ + dw0 = hptep->dw0.dw0; + + if (dw0.v) { + hptep->dw0.dword0 = 0; + tlbie(slot2va(dw0.avpn, dw0.l, dw0.h, slot), dw0.l); + } + } + + spin_unlock(&native_tlbie_lock); + local_irq_restore(flags); +} + static void native_flush_hash_range(unsigned long context, unsigned long number, int local) { @@ -415,7 +459,8 @@ void hpte_init_native(void) ppc_md.hpte_updatepp = native_hpte_updatepp; ppc_md.hpte_updateboltedpp = native_hpte_updateboltedpp; ppc_md.hpte_insert = native_hpte_insert; - ppc_md.hpte_remove = native_hpte_remove; + ppc_md.hpte_remove = native_hpte_remove; + ppc_md.hpte_clear_all = native_hpte_clear; if (tlb_batching_enabled()) ppc_md.flush_hash_range = native_flush_hash_range; htab_finish_init(); diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 32696c1d9280..6600ee87f896 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -455,6 +455,14 @@ config NO_IDLE_HZ_INIT The HZ timer is switched off in idle by default. That means the HZ timer is already disabled at boot time. +config KEXEC + bool "kexec system call (EXPERIMENTAL)" + depends on EXPERIMENTAL + help + kexec is a system call that implements the ability to shutdown your + current kernel, and to start another kernel. It is like a reboot + but is independent of hardware/microcode support. + endmenu config PCMCIA diff --git a/arch/s390/defconfig b/arch/s390/defconfig index 07fd0414a4bf..89850b2c27ea 100644 --- a/arch/s390/defconfig +++ b/arch/s390/defconfig @@ -245,6 +245,7 @@ CONFIG_S390_TAPE_BLOCK=y # CONFIG_S390_TAPE_34XX=m # CONFIG_VMLOGRDR is not set +# CONFIG_VMCP is not set # CONFIG_MONREADER is not set # CONFIG_DCSS_SHM is not set diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index b41e0e199a7c..ab1e49d2e518 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -25,6 +25,16 @@ obj-$(CONFIG_ARCH_S390X) += entry64.o reipl64.o obj-$(CONFIG_VIRT_TIMER) += vtime.o +# Kexec part +S390_KEXEC_OBJS := machine_kexec.o crash.o +ifeq ($(CONFIG_ARCH_S390X),y) +S390_KEXEC_OBJS += relocate_kernel64.o +else +S390_KEXEC_OBJS += relocate_kernel.o +endif +obj-$(CONFIG_KEXEC) += $(S390_KEXEC_OBJS) + + # # This is just to get the dependencies... # diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S index 7a607b1d0380..bf529739c8ab 100644 --- a/arch/s390/kernel/compat_wrapper.S +++ b/arch/s390/kernel/compat_wrapper.S @@ -1441,3 +1441,11 @@ compat_sys_waitid_wrapper: lgfr %r5,%r5 # int llgtr %r6,%r6 # struct rusage_emu31 * jg compat_sys_waitid + + .globl compat_sys_kexec_load_wrapper +compat_sys_kexec_load_wrapper: + llgfr %r2,%r2 # unsigned long + llgfr %r3,%r3 # unsigned long + llgtr %r4,%r4 # struct kexec_segment * + llgfr %r5,%r5 # unsigned long + jg compat_sys_kexec_load diff --git a/arch/s390/kernel/cpcmd.c b/arch/s390/kernel/cpcmd.c index 44df8dc07c59..20062145e84e 100644 --- a/arch/s390/kernel/cpcmd.c +++ b/arch/s390/kernel/cpcmd.c @@ -2,7 +2,7 @@ * arch/s390/kernel/cpcmd.c * * S390 version - * Copyright (C) 1999,2000 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Copyright (C) 1999,2005 IBM Deutschland Entwicklung GmbH, IBM Corporation * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), * Christian Borntraeger (cborntra@de.ibm.com), */ @@ -18,93 +18,114 @@ #include <asm/system.h> static DEFINE_SPINLOCK(cpcmd_lock); -static char cpcmd_buf[240]; +static char cpcmd_buf[241]; /* * the caller of __cpcmd has to ensure that the response buffer is below 2 GB */ -void __cpcmd(char *cmd, char *response, int rlen) +int __cpcmd(const char *cmd, char *response, int rlen, int *response_code) { const int mask = 0x40000000L; unsigned long flags; + int return_code; + int return_len; int cmdlen; spin_lock_irqsave(&cpcmd_lock, flags); cmdlen = strlen(cmd); BUG_ON(cmdlen > 240); - strcpy(cpcmd_buf, cmd); + memcpy(cpcmd_buf, cmd, cmdlen); ASCEBC(cpcmd_buf, cmdlen); if (response != NULL && rlen > 0) { memset(response, 0, rlen); #ifndef CONFIG_ARCH_S390X - asm volatile ("LRA 2,0(%0)\n\t" - "LR 4,%1\n\t" - "O 4,%4\n\t" - "LRA 3,0(%2)\n\t" - "LR 5,%3\n\t" - ".long 0x83240008 # Diagnose X'08'\n\t" - : /* no output */ - : "a" (cpcmd_buf), "d" (cmdlen), - "a" (response), "d" (rlen), "m" (mask) - : "cc", "2", "3", "4", "5" ); + asm volatile ( "lra 2,0(%2)\n" + "lr 4,%3\n" + "o 4,%6\n" + "lra 3,0(%4)\n" + "lr 5,%5\n" + "diag 2,4,0x8\n" + "brc 8, .Litfits\n" + "ar 5, %5\n" + ".Litfits: \n" + "lr %0,4\n" + "lr %1,5\n" + : "=d" (return_code), "=d" (return_len) + : "a" (cpcmd_buf), "d" (cmdlen), + "a" (response), "d" (rlen), "m" (mask) + : "cc", "2", "3", "4", "5" ); #else /* CONFIG_ARCH_S390X */ - asm volatile (" lrag 2,0(%0)\n" - " lgr 4,%1\n" - " o 4,%4\n" - " lrag 3,0(%2)\n" - " lgr 5,%3\n" - " sam31\n" - " .long 0x83240008 # Diagnose X'08'\n" - " sam64" - : /* no output */ - : "a" (cpcmd_buf), "d" (cmdlen), - "a" (response), "d" (rlen), "m" (mask) - : "cc", "2", "3", "4", "5" ); + asm volatile ( "lrag 2,0(%2)\n" + "lgr 4,%3\n" + "o 4,%6\n" + "lrag 3,0(%4)\n" + "lgr 5,%5\n" + "sam31\n" + "diag 2,4,0x8\n" + "sam64\n" + "brc 8, .Litfits\n" + "agr 5, %5\n" + ".Litfits: \n" + "lgr %0,4\n" + "lgr %1,5\n" + : "=d" (return_code), "=d" (return_len) + : "a" (cpcmd_buf), "d" (cmdlen), + "a" (response), "d" (rlen), "m" (mask) + : "cc", "2", "3", "4", "5" ); #endif /* CONFIG_ARCH_S390X */ EBCASC(response, rlen); } else { + return_len = 0; #ifndef CONFIG_ARCH_S390X - asm volatile ("LRA 2,0(%0)\n\t" - "LR 3,%1\n\t" - ".long 0x83230008 # Diagnose X'08'\n\t" - : /* no output */ - : "a" (cpcmd_buf), "d" (cmdlen) - : "2", "3" ); + asm volatile ( "lra 2,0(%1)\n" + "lr 3,%2\n" + "diag 2,3,0x8\n" + "lr %0,3\n" + : "=d" (return_code) + : "a" (cpcmd_buf), "d" (cmdlen) + : "2", "3" ); #else /* CONFIG_ARCH_S390X */ - asm volatile (" lrag 2,0(%0)\n" - " lgr 3,%1\n" - " sam31\n" - " .long 0x83230008 # Diagnose X'08'\n" - " sam64" - : /* no output */ - : "a" (cpcmd_buf), "d" (cmdlen) - : "2", "3" ); + asm volatile ( "lrag 2,0(%1)\n" + "lgr 3,%2\n" + "sam31\n" + "diag 2,3,0x8\n" + "sam64\n" + "lgr %0,3\n" + : "=d" (return_code) + : "a" (cpcmd_buf), "d" (cmdlen) + : "2", "3" ); #endif /* CONFIG_ARCH_S390X */ } spin_unlock_irqrestore(&cpcmd_lock, flags); + if (response_code != NULL) + *response_code = return_code; + return return_len; } EXPORT_SYMBOL(__cpcmd); #ifdef CONFIG_ARCH_S390X -void cpcmd(char *cmd, char *response, int rlen) +int cpcmd(const char *cmd, char *response, int rlen, int *response_code) { char *lowbuf; + int len; + if ((rlen == 0) || (response == NULL) || !((unsigned long)response >> 31)) - __cpcmd(cmd, response, rlen); + len = __cpcmd(cmd, response, rlen, response_code); else { lowbuf = kmalloc(rlen, GFP_KERNEL | GFP_DMA); if (!lowbuf) { printk(KERN_WARNING "cpcmd: could not allocate response buffer\n"); - return; + return -ENOMEM; } - __cpcmd(cmd, lowbuf, rlen); + len = __cpcmd(cmd, lowbuf, rlen, response_code); memcpy(response, lowbuf, rlen); kfree(lowbuf); } + return len; } EXPORT_SYMBOL(cpcmd); diff --git a/arch/s390/kernel/crash.c b/arch/s390/kernel/crash.c new file mode 100644 index 000000000000..7bd169c58b0c --- /dev/null +++ b/arch/s390/kernel/crash.c @@ -0,0 +1,17 @@ +/* + * arch/s390/kernel/crash.c + * + * (C) Copyright IBM Corp. 2005 + * + * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com> + * + */ + +#include <linux/threads.h> +#include <linux/kexec.h> + +note_buf_t crash_notes[NR_CPUS]; + +void machine_crash_shutdown(struct pt_regs *regs) +{ +} diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index 91f8ce5543d3..960ba6029c3a 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -19,22 +19,27 @@ #include <linux/sysctl.h> #include <asm/uaccess.h> #include <asm/semaphore.h> - #include <linux/module.h> #include <linux/init.h> +#include <linux/fs.h> +#include <linux/debugfs.h> #include <asm/debug.h> #define DEBUG_PROLOG_ENTRY -1 +#define ALL_AREAS 0 /* copy all debug areas */ +#define NO_AREAS 1 /* copy no debug areas */ + /* typedefs */ typedef struct file_private_info { loff_t offset; /* offset of last read in file */ int act_area; /* number of last formated area */ + int act_page; /* act page in given area */ int act_entry; /* last formated entry (offset */ /* relative to beginning of last */ - /* formated area) */ + /* formated page) */ size_t act_entry_offset; /* up to this offset we copied */ /* in last read the last formated */ /* entry to userland */ @@ -51,8 +56,8 @@ typedef struct * This assumes that all args are converted into longs * on L/390 this is the case for all types of parameter * except of floats, and long long (32 bit) - * - */ + * + */ long args[0]; } debug_sprintf_entry_t; @@ -63,32 +68,38 @@ extern void tod_to_timeval(uint64_t todval, struct timeval *xtime); static int debug_init(void); static ssize_t debug_output(struct file *file, char __user *user_buf, - size_t user_len, loff_t * offset); + size_t user_len, loff_t * offset); static ssize_t debug_input(struct file *file, const char __user *user_buf, - size_t user_len, loff_t * offset); + size_t user_len, loff_t * offset); static int debug_open(struct inode *inode, struct file *file); static int debug_close(struct inode *inode, struct file *file); -static debug_info_t* debug_info_create(char *name, int page_order, int nr_areas, int buf_size); +static debug_info_t* debug_info_create(char *name, int pages_per_area, + int nr_areas, int buf_size); static void debug_info_get(debug_info_t *); static void debug_info_put(debug_info_t *); static int debug_prolog_level_fn(debug_info_t * id, - struct debug_view *view, char *out_buf); + struct debug_view *view, char *out_buf); static int debug_input_level_fn(debug_info_t * id, struct debug_view *view, - struct file *file, const char __user *user_buf, - size_t user_buf_size, loff_t * offset); + struct file *file, const char __user *user_buf, + size_t user_buf_size, loff_t * offset); +static int debug_prolog_pages_fn(debug_info_t * id, + struct debug_view *view, char *out_buf); +static int debug_input_pages_fn(debug_info_t * id, struct debug_view *view, + struct file *file, const char __user *user_buf, + size_t user_buf_size, loff_t * offset); static int debug_input_flush_fn(debug_info_t * id, struct debug_view *view, - struct file *file, const char __user *user_buf, - size_t user_buf_size, loff_t * offset); + struct file *file, const char __user *user_buf, + size_t user_buf_size, loff_t * offset); static int debug_hex_ascii_format_fn(debug_info_t * id, struct debug_view *view, - char *out_buf, const char *in_buf); + char *out_buf, const char *in_buf); static int debug_raw_format_fn(debug_info_t * id, - struct debug_view *view, char *out_buf, - const char *in_buf); + struct debug_view *view, char *out_buf, + const char *in_buf); static int debug_raw_header_fn(debug_info_t * id, struct debug_view *view, - int area, debug_entry_t * entry, char *out_buf); + int area, debug_entry_t * entry, char *out_buf); static int debug_sprintf_format_fn(debug_info_t * id, struct debug_view *view, - char *out_buf, debug_sprintf_entry_t *curr_event); + char *out_buf, debug_sprintf_entry_t *curr_event); /* globals */ @@ -119,6 +130,15 @@ struct debug_view debug_level_view = { NULL }; +struct debug_view debug_pages_view = { + "pages", + &debug_prolog_pages_fn, + NULL, + NULL, + &debug_input_pages_fn, + NULL +}; + struct debug_view debug_flush_view = { "flush", NULL, @@ -149,98 +169,161 @@ DECLARE_MUTEX(debug_lock); static int initialized; static struct file_operations debug_file_ops = { - .owner = THIS_MODULE, + .owner = THIS_MODULE, .read = debug_output, - .write = debug_input, + .write = debug_input, .open = debug_open, .release = debug_close, }; -static struct proc_dir_entry *debug_proc_root_entry; +static struct dentry *debug_debugfs_root_entry; /* functions */ /* + * debug_areas_alloc + * - Debug areas are implemented as a threedimensonal array: + * areas[areanumber][pagenumber][pageoffset] + */ + +static debug_entry_t*** +debug_areas_alloc(int pages_per_area, int nr_areas) +{ + debug_entry_t*** areas; + int i,j; + + areas = (debug_entry_t ***) kmalloc(nr_areas * + sizeof(debug_entry_t**), + GFP_KERNEL); + if (!areas) + goto fail_malloc_areas; + for (i = 0; i < nr_areas; i++) { + areas[i] = (debug_entry_t**) kmalloc(pages_per_area * + sizeof(debug_entry_t*),GFP_KERNEL); + if (!areas[i]) { + goto fail_malloc_areas2; + } + for(j = 0; j < pages_per_area; j++) { + areas[i][j] = (debug_entry_t*)kmalloc(PAGE_SIZE, + GFP_KERNEL); + if(!areas[i][j]) { + for(j--; j >=0 ; j--) { + kfree(areas[i][j]); + } + kfree(areas[i]); + goto fail_malloc_areas2; + } else { + memset(areas[i][j],0,PAGE_SIZE); + } + } + } + return areas; + +fail_malloc_areas2: + for(i--; i >= 0; i--){ + for(j=0; j < pages_per_area;j++){ + kfree(areas[i][j]); + } + kfree(areas[i]); + } + kfree(areas); +fail_malloc_areas: + return NULL; + +} + + +/* * debug_info_alloc * - alloc new debug-info */ -static debug_info_t* debug_info_alloc(char *name, int page_order, - int nr_areas, int buf_size) +static debug_info_t* +debug_info_alloc(char *name, int pages_per_area, int nr_areas, int buf_size, + int level, int mode) { debug_info_t* rc; - int i; /* alloc everything */ - rc = (debug_info_t*) kmalloc(sizeof(debug_info_t), GFP_ATOMIC); + rc = (debug_info_t*) kmalloc(sizeof(debug_info_t), GFP_KERNEL); if(!rc) goto fail_malloc_rc; - rc->active_entry = (int*)kmalloc(nr_areas * sizeof(int), GFP_ATOMIC); - if(!rc->active_entry) - goto fail_malloc_active_entry; - memset(rc->active_entry, 0, nr_areas * sizeof(int)); - rc->areas = (debug_entry_t **) kmalloc(nr_areas * - sizeof(debug_entry_t *), - GFP_ATOMIC); - if (!rc->areas) - goto fail_malloc_areas; - for (i = 0; i < nr_areas; i++) { - rc->areas[i] = (debug_entry_t *) __get_free_pages(GFP_ATOMIC, - page_order); - if (!rc->areas[i]) { - for (i--; i >= 0; i--) { - free_pages((unsigned long) rc->areas[i], - page_order); - } - goto fail_malloc_areas2; - } else { - memset(rc->areas[i], 0, PAGE_SIZE << page_order); - } + rc->active_entries = (int*)kmalloc(nr_areas * sizeof(int), GFP_KERNEL); + if(!rc->active_entries) + goto fail_malloc_active_entries; + memset(rc->active_entries, 0, nr_areas * sizeof(int)); + rc->active_pages = (int*)kmalloc(nr_areas * sizeof(int), GFP_KERNEL); + if(!rc->active_pages) + goto fail_malloc_active_pages; + memset(rc->active_pages, 0, nr_areas * sizeof(int)); + if((mode == ALL_AREAS) && (pages_per_area != 0)){ + rc->areas = debug_areas_alloc(pages_per_area, nr_areas); + if(!rc->areas) + goto fail_malloc_areas; + } else { + rc->areas = NULL; } /* initialize members */ spin_lock_init(&rc->lock); - rc->page_order = page_order; - rc->nr_areas = nr_areas; - rc->active_area = 0; - rc->level = DEBUG_DEFAULT_LEVEL; - rc->buf_size = buf_size; - rc->entry_size = sizeof(debug_entry_t) + buf_size; - strlcpy(rc->name, name, sizeof(rc->name)); + rc->pages_per_area = pages_per_area; + rc->nr_areas = nr_areas; + rc->active_area = 0; + rc->level = level; + rc->buf_size = buf_size; + rc->entry_size = sizeof(debug_entry_t) + buf_size; + strlcpy(rc->name, name, sizeof(rc->name)-1); memset(rc->views, 0, DEBUG_MAX_VIEWS * sizeof(struct debug_view *)); -#ifdef CONFIG_PROC_FS - memset(rc->proc_entries, 0 ,DEBUG_MAX_VIEWS * - sizeof(struct proc_dir_entry*)); -#endif /* CONFIG_PROC_FS */ + memset(rc->debugfs_entries, 0 ,DEBUG_MAX_VIEWS * + sizeof(struct dentry*)); atomic_set(&(rc->ref_count), 0); return rc; -fail_malloc_areas2: - kfree(rc->areas); fail_malloc_areas: - kfree(rc->active_entry); -fail_malloc_active_entry: + kfree(rc->active_pages); +fail_malloc_active_pages: + kfree(rc->active_entries); +fail_malloc_active_entries: kfree(rc); fail_malloc_rc: return NULL; } /* - * debug_info_free - * - free memory debug-info + * debug_areas_free + * - free all debug areas */ -static void debug_info_free(debug_info_t* db_info){ - int i; +static void +debug_areas_free(debug_info_t* db_info) +{ + int i,j; + + if(!db_info->areas) + return; for (i = 0; i < db_info->nr_areas; i++) { - free_pages((unsigned long) db_info->areas[i], - db_info->page_order); + for(j = 0; j < db_info->pages_per_area; j++) { + kfree(db_info->areas[i][j]); + } + kfree(db_info->areas[i]); } kfree(db_info->areas); - kfree(db_info->active_entry); + db_info->areas = NULL; +} + +/* + * debug_info_free + * - free memory debug-info + */ + +static void +debug_info_free(debug_info_t* db_info){ + debug_areas_free(db_info); + kfree(db_info->active_entries); + kfree(db_info->active_pages); kfree(db_info); } @@ -249,21 +332,22 @@ static void debug_info_free(debug_info_t* db_info){ * - create new debug-info */ -static debug_info_t* debug_info_create(char *name, int page_order, - int nr_areas, int buf_size) +static debug_info_t* +debug_info_create(char *name, int pages_per_area, int nr_areas, int buf_size) { debug_info_t* rc; - rc = debug_info_alloc(name, page_order, nr_areas, buf_size); + rc = debug_info_alloc(name, pages_per_area, nr_areas, buf_size, + DEBUG_DEFAULT_LEVEL, ALL_AREAS); if(!rc) goto out; - - /* create proc rood directory */ - rc->proc_root_entry = proc_mkdir(rc->name, debug_proc_root_entry); + /* create root directory */ + rc->debugfs_root_entry = debugfs_create_dir(rc->name, + debug_debugfs_root_entry); /* append new element to linked list */ - if (debug_area_first == NULL) { + if (!debug_area_first) { /* first element in list */ debug_area_first = rc; rc->prev = NULL; @@ -285,17 +369,21 @@ out: * - copy debug-info */ -static debug_info_t* debug_info_copy(debug_info_t* in) +static debug_info_t* +debug_info_copy(debug_info_t* in, int mode) { - int i; + int i,j; debug_info_t* rc; - rc = debug_info_alloc(in->name, in->page_order, - in->nr_areas, in->buf_size); - if(!rc) + + rc = debug_info_alloc(in->name, in->pages_per_area, in->nr_areas, + in->buf_size, in->level, mode); + if(!rc || (mode == NO_AREAS)) goto out; for(i = 0; i < in->nr_areas; i++){ - memcpy(rc->areas[i],in->areas[i], PAGE_SIZE << in->page_order); + for(j = 0; j < in->pages_per_area; j++) { + memcpy(rc->areas[i][j], in->areas[i][j],PAGE_SIZE); + } } out: return rc; @@ -306,7 +394,8 @@ out: * - increments reference count for debug-info */ -static void debug_info_get(debug_info_t * db_info) +static void +debug_info_get(debug_info_t * db_info) { if (db_info) atomic_inc(&db_info->ref_count); @@ -317,29 +406,20 @@ static void debug_info_get(debug_info_t * db_info) * - decreases reference count for debug-info and frees it if necessary */ -static void debug_info_put(debug_info_t *db_info) +static void +debug_info_put(debug_info_t *db_info) { int i; if (!db_info) return; if (atomic_dec_and_test(&db_info->ref_count)) { -#ifdef DEBUG - printk(KERN_INFO "debug: freeing debug area %p (%s)\n", - db_info, db_info->name); -#endif for (i = 0; i < DEBUG_MAX_VIEWS; i++) { - if (db_info->views[i] == NULL) + if (!db_info->views[i]) continue; -#ifdef CONFIG_PROC_FS - remove_proc_entry(db_info->proc_entries[i]->name, - db_info->proc_root_entry); -#endif + debugfs_remove(db_info->debugfs_entries[i]); } -#ifdef CONFIG_PROC_FS - remove_proc_entry(db_info->proc_root_entry->name, - debug_proc_root_entry); -#endif + debugfs_remove(db_info->debugfs_root_entry); if(db_info == debug_area_first) debug_area_first = db_info->next; if(db_info == debug_area_last) @@ -355,9 +435,9 @@ static void debug_info_put(debug_info_t *db_info) * - format one debug entry and return size of formated data */ -static int debug_format_entry(file_private_info_t *p_info) +static int +debug_format_entry(file_private_info_t *p_info) { - debug_info_t *id_org = p_info->debug_info_org; debug_info_t *id_snap = p_info->debug_info_snap; struct debug_view *view = p_info->view; debug_entry_t *act_entry; @@ -365,22 +445,23 @@ static int debug_format_entry(file_private_info_t *p_info) if(p_info->act_entry == DEBUG_PROLOG_ENTRY){ /* print prolog */ if (view->prolog_proc) - len += view->prolog_proc(id_org, view,p_info->temp_buf); + len += view->prolog_proc(id_snap,view,p_info->temp_buf); goto out; } - - act_entry = (debug_entry_t *) ((char*)id_snap->areas[p_info->act_area] + - p_info->act_entry); + if (!id_snap->areas) /* this is true, if we have a prolog only view */ + goto out; /* or if 'pages_per_area' is 0 */ + act_entry = (debug_entry_t *) ((char*)id_snap->areas[p_info->act_area] + [p_info->act_page] + p_info->act_entry); if (act_entry->id.stck == 0LL) goto out; /* empty entry */ if (view->header_proc) - len += view->header_proc(id_org, view, p_info->act_area, + len += view->header_proc(id_snap, view, p_info->act_area, act_entry, p_info->temp_buf + len); if (view->format_proc) - len += view->format_proc(id_org, view, p_info->temp_buf + len, + len += view->format_proc(id_snap, view, p_info->temp_buf + len, DEBUG_DATA(act_entry)); - out: +out: return len; } @@ -389,20 +470,30 @@ static int debug_format_entry(file_private_info_t *p_info) * - goto next entry in p_info */ -extern inline int debug_next_entry(file_private_info_t *p_info) +extern inline int +debug_next_entry(file_private_info_t *p_info) { - debug_info_t *id = p_info->debug_info_snap; + debug_info_t *id; + + id = p_info->debug_info_snap; if(p_info->act_entry == DEBUG_PROLOG_ENTRY){ p_info->act_entry = 0; + p_info->act_page = 0; goto out; } - if ((p_info->act_entry += id->entry_size) - > ((PAGE_SIZE << (id->page_order)) - - id->entry_size)){ - - /* next area */ + if(!id->areas) + return 1; + p_info->act_entry += id->entry_size; + /* switch to next page, if we reached the end of the page */ + if (p_info->act_entry > (PAGE_SIZE - id->entry_size)){ + /* next page */ p_info->act_entry = 0; - p_info->act_area++; + p_info->act_page += 1; + if((p_info->act_page % id->pages_per_area) == 0) { + /* next area */ + p_info->act_area++; + p_info->act_page=0; + } if(p_info->act_area >= id->nr_areas) return 1; } @@ -416,13 +507,14 @@ out: * - copies formated debug entries to the user buffer */ -static ssize_t debug_output(struct file *file, /* file descriptor */ - char __user *user_buf, /* user buffer */ - size_t len, /* length of buffer */ - loff_t *offset) /* offset in the file */ +static ssize_t +debug_output(struct file *file, /* file descriptor */ + char __user *user_buf, /* user buffer */ + size_t len, /* length of buffer */ + loff_t *offset) /* offset in the file */ { size_t count = 0; - size_t entry_offset, size = 0; + size_t entry_offset; file_private_info_t *p_info; p_info = ((file_private_info_t *) file->private_data); @@ -430,27 +522,33 @@ static ssize_t debug_output(struct file *file, /* file descriptor */ return -EPIPE; if(p_info->act_area >= p_info->debug_info_snap->nr_areas) return 0; - entry_offset = p_info->act_entry_offset; - while(count < len){ - size = debug_format_entry(p_info); - size = min((len - count), (size - entry_offset)); - - if(size){ - if (copy_to_user(user_buf + count, - p_info->temp_buf + entry_offset, size)) - return -EFAULT; + int formatted_line_size; + int formatted_line_residue; + int user_buf_residue; + size_t copy_size; + + formatted_line_size = debug_format_entry(p_info); + formatted_line_residue = formatted_line_size - entry_offset; + user_buf_residue = len-count; + copy_size = min(user_buf_residue, formatted_line_residue); + if(copy_size){ + if (copy_to_user(user_buf + count, p_info->temp_buf + + entry_offset, copy_size)) + return -EFAULT; + count += copy_size; + entry_offset += copy_size; } - count += size; - entry_offset = 0; - if(count != len) - if(debug_next_entry(p_info)) + if(copy_size == formatted_line_residue){ + entry_offset = 0; + if(debug_next_entry(p_info)) goto out; + } } out: p_info->offset = *offset + count; - p_info->act_entry_offset = size; + p_info->act_entry_offset = entry_offset; *offset = p_info->offset; return count; } @@ -461,9 +559,9 @@ out: * - calls input function of view */ -static ssize_t debug_input(struct file *file, - const char __user *user_buf, size_t length, - loff_t *offset) +static ssize_t +debug_input(struct file *file, const char __user *user_buf, size_t length, + loff_t *offset) { int rc = 0; file_private_info_t *p_info; @@ -487,26 +585,23 @@ static ssize_t debug_input(struct file *file, * handle */ -static int debug_open(struct inode *inode, struct file *file) +static int +debug_open(struct inode *inode, struct file *file) { int i = 0, rc = 0; file_private_info_t *p_info; debug_info_t *debug_info, *debug_info_snapshot; -#ifdef DEBUG - printk("debug_open\n"); -#endif down(&debug_lock); /* find debug log and view */ - debug_info = debug_area_first; while(debug_info != NULL){ for (i = 0; i < DEBUG_MAX_VIEWS; i++) { - if (debug_info->views[i] == NULL) + if (!debug_info->views[i]) continue; - else if (debug_info->proc_entries[i] == - PDE(file->f_dentry->d_inode)) { + else if (debug_info->debugfs_entries[i] == + file->f_dentry) { goto found; /* found view ! */ } } @@ -516,41 +611,42 @@ static int debug_open(struct inode *inode, struct file *file) rc = -EINVAL; goto out; - found: +found: - /* make snapshot of current debug areas to get it consistent */ + /* Make snapshot of current debug areas to get it consistent. */ + /* To copy all the areas is only needed, if we have a view which */ + /* formats the debug areas. */ - debug_info_snapshot = debug_info_copy(debug_info); + if(!debug_info->views[i]->format_proc && + !debug_info->views[i]->header_proc){ + debug_info_snapshot = debug_info_copy(debug_info, NO_AREAS); + } else { + debug_info_snapshot = debug_info_copy(debug_info, ALL_AREAS); + } if(!debug_info_snapshot){ -#ifdef DEBUG - printk(KERN_ERR "debug_open: debug_info_copy failed (out of mem)\n"); -#endif rc = -ENOMEM; goto out; } - - if ((file->private_data = - kmalloc(sizeof(file_private_info_t), GFP_ATOMIC)) == 0) { -#ifdef DEBUG - printk(KERN_ERR "debug_open: kmalloc failed\n"); -#endif - debug_info_free(debug_info_snapshot); + p_info = (file_private_info_t *) kmalloc(sizeof(file_private_info_t), + GFP_KERNEL); + if(!p_info){ + if(debug_info_snapshot) + debug_info_free(debug_info_snapshot); rc = -ENOMEM; goto out; } - p_info = (file_private_info_t *) file->private_data; p_info->offset = 0; p_info->debug_info_snap = debug_info_snapshot; p_info->debug_info_org = debug_info; p_info->view = debug_info->views[i]; p_info->act_area = 0; + p_info->act_page = 0; p_info->act_entry = DEBUG_PROLOG_ENTRY; p_info->act_entry_offset = 0; - + file->private_data = p_info; debug_info_get(debug_info); - - out: +out: up(&debug_lock); return rc; } @@ -561,14 +657,13 @@ static int debug_open(struct inode *inode, struct file *file) * - deletes private_data area of the file handle */ -static int debug_close(struct inode *inode, struct file *file) +static int +debug_close(struct inode *inode, struct file *file) { file_private_info_t *p_info; -#ifdef DEBUG - printk("debug_close\n"); -#endif p_info = (file_private_info_t *) file->private_data; - debug_info_free(p_info->debug_info_snap); + if(p_info->debug_info_snap) + debug_info_free(p_info->debug_info_snap); debug_info_put(p_info->debug_info_org); kfree(file->private_data); return 0; /* success */ @@ -580,8 +675,8 @@ static int debug_close(struct inode *inode, struct file *file) * - returns handle for debug area */ -debug_info_t *debug_register - (char *name, int page_order, int nr_areas, int buf_size) +debug_info_t* +debug_register (char *name, int pages_per_area, int nr_areas, int buf_size) { debug_info_t *rc = NULL; @@ -591,18 +686,14 @@ debug_info_t *debug_register /* create new debug_info */ - rc = debug_info_create(name, page_order, nr_areas, buf_size); + rc = debug_info_create(name, pages_per_area, nr_areas, buf_size); if(!rc) goto out; debug_register_view(rc, &debug_level_view); debug_register_view(rc, &debug_flush_view); -#ifdef DEBUG - printk(KERN_INFO - "debug: reserved %d areas of %d pages for debugging %s\n", - nr_areas, 1 << page_order, rc->name); -#endif - out: - if (rc == NULL){ + debug_register_view(rc, &debug_pages_view); +out: + if (!rc){ printk(KERN_ERR "debug: debug_register failed for %s\n",name); } up(&debug_lock); @@ -614,27 +705,65 @@ debug_info_t *debug_register * - give back debug area */ -void debug_unregister(debug_info_t * id) +void +debug_unregister(debug_info_t * id) { if (!id) goto out; down(&debug_lock); -#ifdef DEBUG - printk(KERN_INFO "debug: unregistering %s\n", id->name); -#endif debug_info_put(id); up(&debug_lock); - out: +out: return; } /* + * debug_set_size: + * - set area size (number of pages) and number of areas + */ +static int +debug_set_size(debug_info_t* id, int nr_areas, int pages_per_area) +{ + unsigned long flags; + debug_entry_t *** new_areas; + int rc=0; + + if(!id || (nr_areas <= 0) || (pages_per_area < 0)) + return -EINVAL; + if(pages_per_area > 0){ + new_areas = debug_areas_alloc(pages_per_area, nr_areas); + if(!new_areas) { + printk(KERN_WARNING "debug: could not allocate memory "\ + "for pagenumber: %i\n",pages_per_area); + rc = -ENOMEM; + goto out; + } + } else { + new_areas = NULL; + } + spin_lock_irqsave(&id->lock,flags); + debug_areas_free(id); + id->areas = new_areas; + id->nr_areas = nr_areas; + id->pages_per_area = pages_per_area; + id->active_area = 0; + memset(id->active_entries,0,sizeof(int)*id->nr_areas); + memset(id->active_pages, 0, sizeof(int)*id->nr_areas); + spin_unlock_irqrestore(&id->lock,flags); + printk(KERN_INFO "debug: %s: set new size (%i pages)\n"\ + ,id->name, pages_per_area); +out: + return rc; +} + +/* * debug_set_level: * - set actual debug level */ -void debug_set_level(debug_info_t* id, int new_level) +void +debug_set_level(debug_info_t* id, int new_level) { unsigned long flags; if(!id) @@ -649,10 +778,6 @@ void debug_set_level(debug_info_t* id, int new_level) id->name, new_level, 0, DEBUG_MAX_LEVEL); } else { id->level = new_level; -#ifdef DEBUG - printk(KERN_INFO - "debug: %s: new level %i\n",id->name,id->level); -#endif } spin_unlock_irqrestore(&id->lock,flags); } @@ -663,11 +788,16 @@ void debug_set_level(debug_info_t* id, int new_level) * - set active entry to next in the ring buffer */ -extern inline void proceed_active_entry(debug_info_t * id) +extern inline void +proceed_active_entry(debug_info_t * id) { - if ((id->active_entry[id->active_area] += id->entry_size) - > ((PAGE_SIZE << (id->page_order)) - id->entry_size)) - id->active_entry[id->active_area] = 0; + if ((id->active_entries[id->active_area] += id->entry_size) + > (PAGE_SIZE - id->entry_size)){ + id->active_entries[id->active_area] = 0; + id->active_pages[id->active_area] = + (id->active_pages[id->active_area] + 1) % + id->pages_per_area; + } } /* @@ -675,7 +805,8 @@ extern inline void proceed_active_entry(debug_info_t * id) * - set active area to next in the ring buffer */ -extern inline void proceed_active_area(debug_info_t * id) +extern inline void +proceed_active_area(debug_info_t * id) { id->active_area++; id->active_area = id->active_area % id->nr_areas; @@ -685,10 +816,12 @@ extern inline void proceed_active_area(debug_info_t * id) * get_active_entry: */ -extern inline debug_entry_t *get_active_entry(debug_info_t * id) +extern inline debug_entry_t* +get_active_entry(debug_info_t * id) { - return (debug_entry_t *) ((char *) id->areas[id->active_area] + - id->active_entry[id->active_area]); + return (debug_entry_t *) (((char *) id->areas[id->active_area] + [id->active_pages[id->active_area]]) + + id->active_entries[id->active_area]); } /* @@ -696,8 +829,9 @@ extern inline debug_entry_t *get_active_entry(debug_info_t * id) * - set timestamp, caller address, cpu number etc. */ -extern inline void debug_finish_entry(debug_info_t * id, debug_entry_t* active, - int level, int exception) +extern inline void +debug_finish_entry(debug_info_t * id, debug_entry_t* active, int level, + int exception) { STCK(active->id.stck); active->id.fields.cpuid = smp_processor_id(); @@ -721,7 +855,8 @@ static int debug_active=1; * always allow read, allow write only if debug_stoppable is set or * if debug_active is already off */ -static int s390dbf_procactive(ctl_table *table, int write, struct file *filp, +static int +s390dbf_procactive(ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) { if (!write || debug_stoppable || !debug_active) @@ -766,7 +901,8 @@ static struct ctl_table s390dbf_dir_table[] = { struct ctl_table_header *s390dbf_sysctl_header; -void debug_stop_all(void) +void +debug_stop_all(void) { if (debug_stoppable) debug_active = 0; @@ -778,13 +914,13 @@ void debug_stop_all(void) * - write debug entry with given size */ -debug_entry_t *debug_event_common(debug_info_t * id, int level, const void *buf, - int len) +debug_entry_t* +debug_event_common(debug_info_t * id, int level, const void *buf, int len) { unsigned long flags; debug_entry_t *active; - if (!debug_active) + if (!debug_active || !id->areas) return NULL; spin_lock_irqsave(&id->lock, flags); active = get_active_entry(id); @@ -801,13 +937,13 @@ debug_entry_t *debug_event_common(debug_info_t * id, int level, const void *buf, * - write debug entry with given size and switch to next debug area */ -debug_entry_t *debug_exception_common(debug_info_t * id, int level, - const void *buf, int len) +debug_entry_t +*debug_exception_common(debug_info_t * id, int level, const void *buf, int len) { unsigned long flags; debug_entry_t *active; - if (!debug_active) + if (!debug_active || !id->areas) return NULL; spin_lock_irqsave(&id->lock, flags); active = get_active_entry(id); @@ -823,7 +959,8 @@ debug_entry_t *debug_exception_common(debug_info_t * id, int level, * counts arguments in format string for sprintf view */ -extern inline int debug_count_numargs(char *string) +extern inline int +debug_count_numargs(char *string) { int numargs=0; @@ -838,8 +975,8 @@ extern inline int debug_count_numargs(char *string) * debug_sprintf_event: */ -debug_entry_t *debug_sprintf_event(debug_info_t* id, - int level,char *string,...) +debug_entry_t* +debug_sprintf_event(debug_info_t* id, int level,char *string,...) { va_list ap; int numargs,idx; @@ -849,7 +986,7 @@ debug_entry_t *debug_sprintf_event(debug_info_t* id, if((!id) || (level > id->level)) return NULL; - if (!debug_active) + if (!debug_active || !id->areas) return NULL; numargs=debug_count_numargs(string); @@ -871,8 +1008,8 @@ debug_entry_t *debug_sprintf_event(debug_info_t* id, * debug_sprintf_exception: */ -debug_entry_t *debug_sprintf_exception(debug_info_t* id, - int level,char *string,...) +debug_entry_t* +debug_sprintf_exception(debug_info_t* id, int level,char *string,...) { va_list ap; int numargs,idx; @@ -882,7 +1019,7 @@ debug_entry_t *debug_sprintf_exception(debug_info_t* id, if((!id) || (level > id->level)) return NULL; - if (!debug_active) + if (!debug_active || !id->areas) return NULL; numargs=debug_count_numargs(string); @@ -906,15 +1043,14 @@ debug_entry_t *debug_sprintf_exception(debug_info_t* id, * - is called exactly once to initialize the debug feature */ -static int __init debug_init(void) +static int +__init debug_init(void) { int rc = 0; s390dbf_sysctl_header = register_sysctl_table(s390dbf_dir_table, 1); down(&debug_lock); -#ifdef CONFIG_PROC_FS - debug_proc_root_entry = proc_mkdir(DEBUG_DIR_ROOT, NULL); -#endif /* CONFIG_PROC_FS */ + debug_debugfs_root_entry = debugfs_create_dir(DEBUG_DIR_ROOT,NULL); printk(KERN_INFO "debug: Initialization complete\n"); initialized = 1; up(&debug_lock); @@ -926,13 +1062,14 @@ static int __init debug_init(void) * debug_register_view: */ -int debug_register_view(debug_info_t * id, struct debug_view *view) +int +debug_register_view(debug_info_t * id, struct debug_view *view) { int rc = 0; int i; unsigned long flags; mode_t mode = S_IFREG; - struct proc_dir_entry *pde; + struct dentry *pde; if (!id) goto out; @@ -940,16 +1077,17 @@ int debug_register_view(debug_info_t * id, struct debug_view *view) mode |= S_IRUSR; if (view->input_proc) mode |= S_IWUSR; - pde = create_proc_entry(view->name, mode, id->proc_root_entry); + pde = debugfs_create_file(view->name, mode, id->debugfs_root_entry, + NULL, &debug_file_ops); if (!pde){ - printk(KERN_WARNING "debug: create_proc_entry() failed! Cannot register view %s/%s\n", id->name,view->name); + printk(KERN_WARNING "debug: debugfs_create_file() failed!"\ + " Cannot register view %s/%s\n", id->name,view->name); rc = -1; goto out; } - spin_lock_irqsave(&id->lock, flags); for (i = 0; i < DEBUG_MAX_VIEWS; i++) { - if (id->views[i] == NULL) + if (!id->views[i]) break; } if (i == DEBUG_MAX_VIEWS) { @@ -957,16 +1095,14 @@ int debug_register_view(debug_info_t * id, struct debug_view *view) id->name,view->name); printk(KERN_WARNING "debug: maximum number of views reached (%i)!\n", i); - remove_proc_entry(pde->name, id->proc_root_entry); + debugfs_remove(pde); rc = -1; - } - else { + } else { id->views[i] = view; - pde->proc_fops = &debug_file_ops; - id->proc_entries[i] = pde; + id->debugfs_entries[i] = pde; } spin_unlock_irqrestore(&id->lock, flags); - out: +out: return rc; } @@ -974,7 +1110,8 @@ int debug_register_view(debug_info_t * id, struct debug_view *view) * debug_unregister_view: */ -int debug_unregister_view(debug_info_t * id, struct debug_view *view) +int +debug_unregister_view(debug_info_t * id, struct debug_view *view) { int rc = 0; int i; @@ -990,15 +1127,46 @@ int debug_unregister_view(debug_info_t * id, struct debug_view *view) if (i == DEBUG_MAX_VIEWS) rc = -1; else { -#ifdef CONFIG_PROC_FS - remove_proc_entry(id->proc_entries[i]->name, - id->proc_root_entry); -#endif + debugfs_remove(id->debugfs_entries[i]); id->views[i] = NULL; rc = 0; } spin_unlock_irqrestore(&id->lock, flags); - out: +out: + return rc; +} + +static inline char * +debug_get_user_string(const char __user *user_buf, size_t user_len) +{ + char* buffer; + + buffer = kmalloc(user_len + 1, GFP_KERNEL); + if (!buffer) + return ERR_PTR(-ENOMEM); + if (copy_from_user(buffer, user_buf, user_len) != 0) { + kfree(buffer); + return ERR_PTR(-EFAULT); + } + /* got the string, now strip linefeed. */ + if (buffer[user_len - 1] == '\n') + buffer[user_len - 1] = 0; + else + buffer[user_len] = 0; + return buffer; +} + +static inline int +debug_get_uint(char *buf) +{ + int rc; + + for(; isspace(*buf); buf++); + rc = simple_strtoul(buf, &buf, 10); + if(*buf){ + printk("debug: no integer specified!\n"); + rc = -EINVAL; + } return rc; } @@ -1011,13 +1179,69 @@ int debug_unregister_view(debug_info_t * id, struct debug_view *view) * prints out actual debug level */ -static int debug_prolog_level_fn(debug_info_t * id, +static int +debug_prolog_pages_fn(debug_info_t * id, struct debug_view *view, char *out_buf) { + return sprintf(out_buf, "%i\n", id->pages_per_area); +} + +/* + * reads new size (number of pages per debug area) + */ + +static int +debug_input_pages_fn(debug_info_t * id, struct debug_view *view, + struct file *file, const char __user *user_buf, + size_t user_len, loff_t * offset) +{ + char *str; + int rc,new_pages; + + if (user_len > 0x10000) + user_len = 0x10000; + if (*offset != 0){ + rc = -EPIPE; + goto out; + } + str = debug_get_user_string(user_buf,user_len); + if(IS_ERR(str)){ + rc = PTR_ERR(str); + goto out; + } + new_pages = debug_get_uint(str); + if(new_pages < 0){ + rc = -EINVAL; + goto free_str; + } + rc = debug_set_size(id,id->nr_areas, new_pages); + if(rc != 0){ + rc = -EINVAL; + goto free_str; + } + rc = user_len; +free_str: + kfree(str); +out: + *offset += user_len; + return rc; /* number of input characters */ +} + +/* + * prints out actual debug level + */ + +static int +debug_prolog_level_fn(debug_info_t * id, struct debug_view *view, char *out_buf) +{ int rc = 0; - if(id->level == -1) rc = sprintf(out_buf,"-\n"); - else rc = sprintf(out_buf, "%i\n", id->level); + if(id->level == DEBUG_OFF_LEVEL) { + rc = sprintf(out_buf,"-\n"); + } + else { + rc = sprintf(out_buf, "%i\n", id->level); + } return rc; } @@ -1025,30 +1249,43 @@ static int debug_prolog_level_fn(debug_info_t * id, * reads new debug level */ -static int debug_input_level_fn(debug_info_t * id, struct debug_view *view, - struct file *file, const char __user *user_buf, - size_t in_buf_size, loff_t * offset) +static int +debug_input_level_fn(debug_info_t * id, struct debug_view *view, + struct file *file, const char __user *user_buf, + size_t user_len, loff_t * offset) { - char input_buf[1]; - int rc = in_buf_size; + char *str; + int rc,new_level; - if (*offset != 0) + if (user_len > 0x10000) + user_len = 0x10000; + if (*offset != 0){ + rc = -EPIPE; goto out; - if (copy_from_user(input_buf, user_buf, 1)){ - rc = -EFAULT; + } + str = debug_get_user_string(user_buf,user_len); + if(IS_ERR(str)){ + rc = PTR_ERR(str); goto out; } - if (isdigit(input_buf[0])) { - int new_level = ((int) input_buf[0] - (int) '0'); - debug_set_level(id, new_level); - } else if(input_buf[0] == '-') { + if(str[0] == '-'){ debug_set_level(id, DEBUG_OFF_LEVEL); + rc = user_len; + goto free_str; } else { - printk(KERN_INFO "debug: level `%c` is not valid\n", - input_buf[0]); + new_level = debug_get_uint(str); } - out: - *offset += in_buf_size; + if(new_level < 0) { + printk(KERN_INFO "debug: level `%s` is not valid\n", str); + rc = -EINVAL; + } else { + debug_set_level(id, new_level); + rc = user_len; + } +free_str: + kfree(str); +out: + *offset += user_len; return rc; /* number of input characters */ } @@ -1057,29 +1294,36 @@ static int debug_input_level_fn(debug_info_t * id, struct debug_view *view, * flushes debug areas */ -void debug_flush(debug_info_t* id, int area) +void +debug_flush(debug_info_t* id, int area) { unsigned long flags; - int i; + int i,j; - if(!id) + if(!id || !id->areas) return; spin_lock_irqsave(&id->lock,flags); if(area == DEBUG_FLUSH_ALL){ id->active_area = 0; - memset(id->active_entry, 0, id->nr_areas * sizeof(int)); - for (i = 0; i < id->nr_areas; i++) - memset(id->areas[i], 0, PAGE_SIZE << id->page_order); + memset(id->active_entries, 0, id->nr_areas * sizeof(int)); + for (i = 0; i < id->nr_areas; i++) { + id->active_pages[i] = 0; + for(j = 0; j < id->pages_per_area; j++) { + memset(id->areas[i][j], 0, PAGE_SIZE); + } + } printk(KERN_INFO "debug: %s: all areas flushed\n",id->name); } else if(area >= 0 && area < id->nr_areas) { - id->active_entry[area] = 0; - memset(id->areas[area], 0, PAGE_SIZE << id->page_order); - printk(KERN_INFO - "debug: %s: area %i has been flushed\n", + id->active_entries[area] = 0; + id->active_pages[area] = 0; + for(i = 0; i < id->pages_per_area; i++) { + memset(id->areas[area][i],0,PAGE_SIZE); + } + printk(KERN_INFO "debug: %s: area %i has been flushed\n", id->name, area); } else { printk(KERN_INFO - "debug: %s: area %i cannot be flushed (range: %i - %i)\n", + "debug: %s: area %i cannot be flushed (range: %i - %i)\n", id->name, area, 0, id->nr_areas-1); } spin_unlock_irqrestore(&id->lock,flags); @@ -1089,15 +1333,20 @@ void debug_flush(debug_info_t* id, int area) * view function: flushes debug areas */ -static int debug_input_flush_fn(debug_info_t * id, struct debug_view *view, - struct file *file, const char __user *user_buf, - size_t in_buf_size, loff_t * offset) +static int +debug_input_flush_fn(debug_info_t * id, struct debug_view *view, + struct file *file, const char __user *user_buf, + size_t user_len, loff_t * offset) { char input_buf[1]; - int rc = in_buf_size; - - if (*offset != 0) + int rc = user_len; + + if (user_len > 0x10000) + user_len = 0x10000; + if (*offset != 0){ + rc = -EPIPE; goto out; + } if (copy_from_user(input_buf, user_buf, 1)){ rc = -EFAULT; goto out; @@ -1114,8 +1363,8 @@ static int debug_input_flush_fn(debug_info_t * id, struct debug_view *view, printk(KERN_INFO "debug: area `%c` is not valid\n", input_buf[0]); - out: - *offset += in_buf_size; +out: + *offset += user_len; return rc; /* number of input characters */ } @@ -1123,8 +1372,9 @@ static int debug_input_flush_fn(debug_info_t * id, struct debug_view *view, * prints debug header in raw format */ -int debug_raw_header_fn(debug_info_t * id, struct debug_view *view, - int area, debug_entry_t * entry, char *out_buf) +static int +debug_raw_header_fn(debug_info_t * id, struct debug_view *view, + int area, debug_entry_t * entry, char *out_buf) { int rc; @@ -1137,7 +1387,8 @@ int debug_raw_header_fn(debug_info_t * id, struct debug_view *view, * prints debug data in raw format */ -static int debug_raw_format_fn(debug_info_t * id, struct debug_view *view, +static int +debug_raw_format_fn(debug_info_t * id, struct debug_view *view, char *out_buf, const char *in_buf) { int rc; @@ -1151,8 +1402,9 @@ static int debug_raw_format_fn(debug_info_t * id, struct debug_view *view, * prints debug data in hex/ascii format */ -static int debug_hex_ascii_format_fn(debug_info_t * id, struct debug_view *view, - char *out_buf, const char *in_buf) +static int +debug_hex_ascii_format_fn(debug_info_t * id, struct debug_view *view, + char *out_buf, const char *in_buf) { int i, rc = 0; @@ -1176,7 +1428,8 @@ static int debug_hex_ascii_format_fn(debug_info_t * id, struct debug_view *view, * prints header for debug entry */ -int debug_dflt_header_fn(debug_info_t * id, struct debug_view *view, +int +debug_dflt_header_fn(debug_info_t * id, struct debug_view *view, int area, debug_entry_t * entry, char *out_buf) { struct timeval time_val; @@ -1210,8 +1463,9 @@ int debug_dflt_header_fn(debug_info_t * id, struct debug_view *view, #define DEBUG_SPRINTF_MAX_ARGS 10 -int debug_sprintf_format_fn(debug_info_t * id, struct debug_view *view, - char *out_buf, debug_sprintf_entry_t *curr_event) +static int +debug_sprintf_format_fn(debug_info_t * id, struct debug_view *view, + char *out_buf, debug_sprintf_entry_t *curr_event) { int num_longs, num_used_args = 0,i, rc = 0; int index[DEBUG_SPRINTF_MAX_ARGS]; @@ -1251,14 +1505,10 @@ out: /* * clean up module */ -void __exit debug_exit(void) +void +__exit debug_exit(void) { -#ifdef DEBUG - printk("debug_cleanup_module: \n"); -#endif -#ifdef CONFIG_PROC_FS - remove_proc_entry(debug_proc_root_entry->name, NULL); -#endif /* CONFIG_PROC_FS */ + debugfs_remove(debug_debugfs_root_entry); unregister_sysctl_table(s390dbf_sysctl_header); return; } @@ -1266,7 +1516,7 @@ void __exit debug_exit(void) /* * module definitions */ -core_initcall(debug_init); +postcore_initcall(debug_init); module_exit(debug_exit); MODULE_LICENSE("GPL"); diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index c0e09b33febe..5b262b5d869f 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -7,6 +7,7 @@ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), * Hartmut Penner (hp@de.ibm.com), * Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com), + * Heiko Carstens <heiko.carstens@de.ibm.com> */ #include <linux/sys.h> @@ -49,9 +50,9 @@ SP_ILC = STACK_FRAME_OVERHEAD + __PT_ILC SP_TRAP = STACK_FRAME_OVERHEAD + __PT_TRAP SP_SIZE = STACK_FRAME_OVERHEAD + __PT_SIZE -_TIF_WORK_SVC = (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \ +_TIF_WORK_SVC = (_TIF_SIGPENDING | _TIF_NEED_RESCHED | _TIF_MCCK_PENDING | \ _TIF_RESTART_SVC | _TIF_SINGLE_STEP ) -_TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NEED_RESCHED) +_TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NEED_RESCHED | _TIF_MCCK_PENDING) STACK_SHIFT = PAGE_SHIFT + THREAD_ORDER STACK_SIZE = 1 << STACK_SHIFT @@ -121,7 +122,11 @@ STACK_SIZE = 1 << STACK_SHIFT bz BASED(stack_overflow) 3: #endif -2: s %r15,BASED(.Lc_spsize) # make room for registers & psw +2: + .endm + + .macro CREATE_STACK_FRAME psworg,savearea + s %r15,BASED(.Lc_spsize) # make room for registers & psw mvc SP_PSW(8,%r15),0(%r12) # move user PSW to stack la %r12,\psworg st %r2,SP_ORIG_R2(%r15) # store original content of gpr 2 @@ -161,6 +166,13 @@ __switch_to_base: be __switch_to_noper-__switch_to_base(%r1) # we got away w/o bashing TLB's lctl %c9,%c11,__THREAD_per(%r3) # Nope we didn't __switch_to_noper: + l %r4,__THREAD_info(%r2) # get thread_info of prev + tm __TI_flags+3(%r4),_TIF_MCCK_PENDING # machine check pending? + bz __switch_to_no_mcck-__switch_to_base(%r1) + ni __TI_flags+3(%r4),255-_TIF_MCCK_PENDING # clear flag in prev + l %r4,__THREAD_info(%r3) # get thread_info of next + oi __TI_flags+3(%r4),_TIF_MCCK_PENDING # set it in next +__switch_to_no_mcck: stm %r6,%r15,__SF_GPRS(%r15)# store __switch_to registers of prev task st %r15,__THREAD_ksp(%r2) # store kernel stack to prev->tss.ksp l %r15,__THREAD_ksp(%r3) # load kernel stack from next->tss.ksp @@ -185,6 +197,7 @@ system_call: sysc_saveall: SAVE_ALL_BASE __LC_SAVE_AREA SAVE_ALL __LC_SVC_OLD_PSW,__LC_SAVE_AREA,1 + CREATE_STACK_FRAME __LC_SVC_OLD_PSW,__LC_SAVE_AREA lh %r7,0x8a # get svc number from lowcore #ifdef CONFIG_VIRT_CPU_ACCOUNTING sysc_vtime: @@ -234,6 +247,8 @@ sysc_work_loop: # One of the work bits is on. Find out which one. # sysc_work: + tm __TI_flags+3(%r9),_TIF_MCCK_PENDING + bo BASED(sysc_mcck_pending) tm __TI_flags+3(%r9),_TIF_NEED_RESCHED bo BASED(sysc_reschedule) tm __TI_flags+3(%r9),_TIF_SIGPENDING @@ -253,6 +268,14 @@ sysc_reschedule: br %r1 # call scheduler # +# _TIF_MCCK_PENDING is set, call handler +# +sysc_mcck_pending: + l %r1,BASED(.Ls390_handle_mcck) + la %r14,BASED(sysc_work_loop) + br %r1 # TIF bit will be cleared by handler + +# # _TIF_SIGPENDING is set, call do_signal # sysc_sigpending: @@ -430,6 +453,7 @@ pgm_check_handler: tm __LC_PGM_INT_CODE+1,0x80 # check whether we got a per exception bnz BASED(pgm_per) # got per exception -> special case SAVE_ALL __LC_PGM_OLD_PSW,__LC_SAVE_AREA,1 + CREATE_STACK_FRAME __LC_PGM_OLD_PSW,__LC_SAVE_AREA #ifdef CONFIG_VIRT_CPU_ACCOUNTING tm SP_PSW+1(%r15),0x01 # interrupting from user ? bz BASED(pgm_no_vtime) @@ -468,6 +492,7 @@ pgm_per: # pgm_per_std: SAVE_ALL __LC_PGM_OLD_PSW,__LC_SAVE_AREA,1 + CREATE_STACK_FRAME __LC_PGM_OLD_PSW,__LC_SAVE_AREA #ifdef CONFIG_VIRT_CPU_ACCOUNTING tm SP_PSW+1(%r15),0x01 # interrupting from user ? bz BASED(pgm_no_vtime2) @@ -493,6 +518,7 @@ pgm_no_vtime2: # pgm_svcper: SAVE_ALL __LC_SVC_OLD_PSW,__LC_SAVE_AREA,1 + CREATE_STACK_FRAME __LC_SVC_OLD_PSW,__LC_SAVE_AREA #ifdef CONFIG_VIRT_CPU_ACCOUNTING tm SP_PSW+1(%r15),0x01 # interrupting from user ? bz BASED(pgm_no_vtime3) @@ -521,6 +547,7 @@ io_int_handler: stck __LC_INT_CLOCK SAVE_ALL_BASE __LC_SAVE_AREA+16 SAVE_ALL __LC_IO_OLD_PSW,__LC_SAVE_AREA+16,0 + CREATE_STACK_FRAME __LC_IO_OLD_PSW,__LC_SAVE_AREA+16 #ifdef CONFIG_VIRT_CPU_ACCOUNTING tm SP_PSW+1(%r15),0x01 # interrupting from user ? bz BASED(io_no_vtime) @@ -578,9 +605,11 @@ io_work: lr %r15,%r1 # # One of the work bits is on. Find out which one. -# Checked are: _TIF_SIGPENDING and _TIF_NEED_RESCHED +# Checked are: _TIF_SIGPENDING, _TIF_NEED_RESCHED and _TIF_MCCK_PENDING # io_work_loop: + tm __TI_flags+3(%r9),_TIF_MCCK_PENDING + bo BASED(io_mcck_pending) tm __TI_flags+3(%r9),_TIF_NEED_RESCHED bo BASED(io_reschedule) tm __TI_flags+3(%r9),_TIF_SIGPENDING @@ -588,6 +617,14 @@ io_work_loop: b BASED(io_leave) # +# _TIF_MCCK_PENDING is set, call handler +# +io_mcck_pending: + l %r1,BASED(.Ls390_handle_mcck) + l %r14,BASED(io_work_loop) + br %r1 # TIF bit will be cleared by handler + +# # _TIF_NEED_RESCHED is set, call schedule # io_reschedule: @@ -621,6 +658,7 @@ ext_int_handler: stck __LC_INT_CLOCK SAVE_ALL_BASE __LC_SAVE_AREA+16 SAVE_ALL __LC_EXT_OLD_PSW,__LC_SAVE_AREA+16,0 + CREATE_STACK_FRAME __LC_EXT_OLD_PSW,__LC_SAVE_AREA+16 #ifdef CONFIG_VIRT_CPU_ACCOUNTING tm SP_PSW+1(%r15),0x01 # interrupting from user ? bz BASED(ext_no_vtime) @@ -642,19 +680,62 @@ ext_no_vtime: .globl mcck_int_handler mcck_int_handler: - STORE_TIMER __LC_ASYNC_ENTER_TIMER + spt __LC_CPU_TIMER_SAVE_AREA # revalidate cpu timer + lm %r0,%r15,__LC_GPREGS_SAVE_AREA # revalidate gprs SAVE_ALL_BASE __LC_SAVE_AREA+32 - SAVE_ALL __LC_MCK_OLD_PSW,__LC_SAVE_AREA+32,0 + la %r12,__LC_MCK_OLD_PSW + tm __LC_MCCK_CODE,0x80 # system damage? + bo BASED(mcck_int_main) # yes -> rest of mcck code invalid + tm __LC_MCCK_CODE+5,0x02 # stored cpu timer value valid? + bo BASED(0f) + spt __LC_LAST_UPDATE_TIMER # revalidate cpu timer #ifdef CONFIG_VIRT_CPU_ACCOUNTING - tm SP_PSW+1(%r15),0x01 # interrupting from user ? + mvc __LC_LAST_UPDATE_TIMER(8),__LC_ASYNC_ENTER_TIMER + mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER + mvc __LC_LAST_UPDATE_TIMER(8),__LC_EXIT_TIMER +0: tm __LC_MCCK_CODE+2,0x08 # mwp of old psw valid? + bno BASED(mcck_no_vtime) # no -> skip cleanup critical + tm __LC_MCK_OLD_PSW+1,0x01 # interrupting from user ? bz BASED(mcck_no_vtime) UPDATE_VTIME __LC_EXIT_TIMER,__LC_ASYNC_ENTER_TIMER,__LC_USER_TIMER UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER mvc __LC_LAST_UPDATE_TIMER(8),__LC_ASYNC_ENTER_TIMER mcck_no_vtime: #endif +0: + tm __LC_MCCK_CODE+2,0x09 # mwp + ia of old psw valid? + bno BASED(mcck_int_main) # no -> skip cleanup critical + tm __LC_MCK_OLD_PSW+1,0x01 # test problem state bit + bnz BASED(mcck_int_main) # from user -> load async stack + clc __LC_MCK_OLD_PSW+4(4),BASED(.Lcritical_end) + bhe BASED(mcck_int_main) + clc __LC_MCK_OLD_PSW+4(4),BASED(.Lcritical_start) + bl BASED(mcck_int_main) + l %r14,BASED(.Lcleanup_critical) + basr %r14,%r14 +mcck_int_main: + l %r14,__LC_PANIC_STACK # are we already on the panic stack? + slr %r14,%r15 + sra %r14,PAGE_SHIFT + be BASED(0f) + l %r15,__LC_PANIC_STACK # load panic stack +0: CREATE_STACK_FRAME __LC_MCK_OLD_PSW,__LC_SAVE_AREA+32 + l %r9,__LC_THREAD_INFO # load pointer to thread_info struct + la %r2,SP_PTREGS(%r15) # load pt_regs l %r1,BASED(.Ls390_mcck) - basr %r14,%r1 # call machine check handler + basr %r14,%r1 # call machine check handler + tm SP_PSW+1(%r15),0x01 # returning to user ? + bno BASED(mcck_return) + l %r1,__LC_KERNEL_STACK # switch to kernel stack + s %r1,BASED(.Lc_spsize) + mvc SP_PTREGS(__PT_SIZE,%r1),SP_PTREGS(%r15) + xc __SF_BACKCHAIN(4,%r1),__SF_BACKCHAIN(%r1) # clear back chain + lr %r15,%r1 + stosm __SF_EMPTY(%r15),0x04 # turn dat on + tm __TI_flags+3(%r9),_TIF_MCCK_PENDING + bno BASED(mcck_return) + l %r1,BASED(.Ls390_handle_mcck) + basr %r14,%r1 # call machine check handler mcck_return: RESTORE_ALL 0 @@ -742,7 +823,7 @@ cleanup_critical: clc 4(4,%r12),BASED(cleanup_table_sysc_work_loop) bl BASED(0f) clc 4(4,%r12),BASED(cleanup_table_sysc_work_loop+4) - bl BASED(cleanup_sysc_leave) + bl BASED(cleanup_sysc_return) 0: br %r14 @@ -760,6 +841,7 @@ cleanup_system_call: mvc __LC_SAVE_AREA(16),__LC_SAVE_AREA+16 0: st %r13,__LC_SAVE_AREA+20 SAVE_ALL __LC_SVC_OLD_PSW,__LC_SAVE_AREA,1 + CREATE_STACK_FRAME __LC_SVC_OLD_PSW,__LC_SAVE_AREA st %r15,__LC_SAVE_AREA+28 lh %r7,0x8a #ifdef CONFIG_VIRT_CPU_ACCOUNTING @@ -834,6 +916,8 @@ cleanup_sysc_leave_insn: * Symbol constants */ .Ls390_mcck: .long s390_do_machine_check +.Ls390_handle_mcck: + .long s390_handle_mcck .Ldo_IRQ: .long do_IRQ .Ldo_extint: .long do_extint .Ldo_signal: .long do_signal diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index 51527ab8c8f9..57ca75d0ad7f 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -7,6 +7,7 @@ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), * Hartmut Penner (hp@de.ibm.com), * Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com), + * Heiko Carstens <heiko.carstens@de.ibm.com> */ #include <linux/sys.h> @@ -52,9 +53,9 @@ SP_SIZE = STACK_FRAME_OVERHEAD + __PT_SIZE STACK_SHIFT = PAGE_SHIFT + THREAD_ORDER STACK_SIZE = 1 << STACK_SHIFT -_TIF_WORK_SVC = (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \ +_TIF_WORK_SVC = (_TIF_SIGPENDING | _TIF_NEED_RESCHED | _TIF_MCCK_PENDING | \ _TIF_RESTART_SVC | _TIF_SINGLE_STEP ) -_TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NEED_RESCHED) +_TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NEED_RESCHED | _TIF_MCCK_PENDING) #define BASED(name) name-system_call(%r13) @@ -114,7 +115,11 @@ _TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NEED_RESCHED) jz stack_overflow 3: #endif -2: aghi %r15,-SP_SIZE # make room for registers & psw +2: + .endm + + .macro CREATE_STACK_FRAME psworg,savearea + aghi %r15,-SP_SIZE # make room for registers & psw mvc SP_PSW(16,%r15),0(%r12) # move user PSW to stack la %r12,\psworg stg %r2,SP_ORIG_R2(%r15) # store original content of gpr 2 @@ -152,6 +157,13 @@ __switch_to: je __switch_to_noper # we got away without bashing TLB's lctlg %c9,%c11,__THREAD_per(%r3) # Nope we didn't __switch_to_noper: + lg %r4,__THREAD_info(%r2) # get thread_info of prev + tm __TI_flags+7(%r4),_TIF_MCCK_PENDING # machine check pending? + jz __switch_to_no_mcck + ni __TI_flags+7(%r4),255-_TIF_MCCK_PENDING # clear flag in prev + lg %r4,__THREAD_info(%r3) # get thread_info of next + oi __TI_flags+7(%r4),_TIF_MCCK_PENDING # set it in next +__switch_to_no_mcck: stmg %r6,%r15,__SF_GPRS(%r15)# store __switch_to registers of prev task stg %r15,__THREAD_ksp(%r2) # store kernel stack to prev->tss.ksp lg %r15,__THREAD_ksp(%r3) # load kernel stack from next->tss.ksp @@ -176,6 +188,7 @@ system_call: sysc_saveall: SAVE_ALL_BASE __LC_SAVE_AREA SAVE_ALL __LC_SVC_OLD_PSW,__LC_SAVE_AREA,1 + CREATE_STACK_FRAME __LC_SVC_OLD_PSW,__LC_SAVE_AREA llgh %r7,__LC_SVC_INT_CODE # get svc number from lowcore #ifdef CONFIG_VIRT_CPU_ACCOUNTING sysc_vtime: @@ -232,6 +245,8 @@ sysc_work_loop: # One of the work bits is on. Find out which one. # sysc_work: + tm __TI_flags+7(%r9),_TIF_MCCK_PENDING + jo sysc_mcck_pending tm __TI_flags+7(%r9),_TIF_NEED_RESCHED jo sysc_reschedule tm __TI_flags+7(%r9),_TIF_SIGPENDING @@ -250,6 +265,13 @@ sysc_reschedule: jg schedule # return point is sysc_return # +# _TIF_MCCK_PENDING is set, call handler +# +sysc_mcck_pending: + larl %r14,sysc_work_loop + jg s390_handle_mcck # TIF bit will be cleared by handler + +# # _TIF_SIGPENDING is set, call do_signal # sysc_sigpending: @@ -474,6 +496,7 @@ pgm_check_handler: tm __LC_PGM_INT_CODE+1,0x80 # check whether we got a per exception jnz pgm_per # got per exception -> special case SAVE_ALL __LC_PGM_OLD_PSW,__LC_SAVE_AREA,1 + CREATE_STACK_FRAME __LC_PGM_OLD_PSW,__LC_SAVE_AREA #ifdef CONFIG_VIRT_CPU_ACCOUNTING tm SP_PSW+1(%r15),0x01 # interrupting from user ? jz pgm_no_vtime @@ -512,6 +535,7 @@ pgm_per: # pgm_per_std: SAVE_ALL __LC_PGM_OLD_PSW,__LC_SAVE_AREA,1 + CREATE_STACK_FRAME __LC_PGM_OLD_PSW,__LC_SAVE_AREA #ifdef CONFIG_VIRT_CPU_ACCOUNTING tm SP_PSW+1(%r15),0x01 # interrupting from user ? jz pgm_no_vtime2 @@ -537,6 +561,7 @@ pgm_no_vtime2: # pgm_svcper: SAVE_ALL __LC_SVC_OLD_PSW,__LC_SAVE_AREA,1 + CREATE_STACK_FRAME __LC_SVC_OLD_PSW,__LC_SAVE_AREA #ifdef CONFIG_VIRT_CPU_ACCOUNTING tm SP_PSW+1(%r15),0x01 # interrupting from user ? jz pgm_no_vtime3 @@ -564,6 +589,7 @@ io_int_handler: stck __LC_INT_CLOCK SAVE_ALL_BASE __LC_SAVE_AREA+32 SAVE_ALL __LC_IO_OLD_PSW,__LC_SAVE_AREA+32,0 + CREATE_STACK_FRAME __LC_IO_OLD_PSW,__LC_SAVE_AREA+32 #ifdef CONFIG_VIRT_CPU_ACCOUNTING tm SP_PSW+1(%r15),0x01 # interrupting from user ? jz io_no_vtime @@ -621,9 +647,11 @@ io_work: lgr %r15,%r1 # # One of the work bits is on. Find out which one. -# Checked are: _TIF_SIGPENDING and _TIF_NEED_RESCHED +# Checked are: _TIF_SIGPENDING, _TIF_NEED_RESCHED and _TIF_MCCK_PENDING # io_work_loop: + tm __TI_flags+7(%r9),_TIF_MCCK_PENDING + jo io_mcck_pending tm __TI_flags+7(%r9),_TIF_NEED_RESCHED jo io_reschedule tm __TI_flags+7(%r9),_TIF_SIGPENDING @@ -631,6 +659,13 @@ io_work_loop: j io_leave # +# _TIF_MCCK_PENDING is set, call handler +# +io_mcck_pending: + larl %r14,io_work_loop + jg s390_handle_mcck # TIF bit will be cleared by handler + +# # _TIF_NEED_RESCHED is set, call schedule # io_reschedule: @@ -661,6 +696,7 @@ ext_int_handler: stck __LC_INT_CLOCK SAVE_ALL_BASE __LC_SAVE_AREA+32 SAVE_ALL __LC_EXT_OLD_PSW,__LC_SAVE_AREA+32,0 + CREATE_STACK_FRAME __LC_EXT_OLD_PSW,__LC_SAVE_AREA+32 #ifdef CONFIG_VIRT_CPU_ACCOUNTING tm SP_PSW+1(%r15),0x01 # interrupting from user ? jz ext_no_vtime @@ -680,18 +716,60 @@ ext_no_vtime: */ .globl mcck_int_handler mcck_int_handler: - STORE_TIMER __LC_ASYNC_ENTER_TIMER + la %r1,4095 # revalidate r1 + spt __LC_CPU_TIMER_SAVE_AREA-4095(%r1) # revalidate cpu timer + lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1)# revalidate gprs SAVE_ALL_BASE __LC_SAVE_AREA+64 - SAVE_ALL __LC_MCK_OLD_PSW,__LC_SAVE_AREA+64,0 + la %r12,__LC_MCK_OLD_PSW + tm __LC_MCCK_CODE,0x80 # system damage? + jo mcck_int_main # yes -> rest of mcck code invalid + tm __LC_MCCK_CODE+5,0x02 # stored cpu timer value valid? + jo 0f + spt __LC_LAST_UPDATE_TIMER #ifdef CONFIG_VIRT_CPU_ACCOUNTING - tm SP_PSW+1(%r15),0x01 # interrupting from user ? + mvc __LC_LAST_UPDATE_TIMER(8),__LC_ASYNC_ENTER_TIMER + mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER + mvc __LC_LAST_UPDATE_TIMER(8),__LC_EXIT_TIMER +0: tm __LC_MCCK_CODE+2,0x08 # mwp of old psw valid? + jno mcck_no_vtime # no -> no timer update + tm __LC_MCK_OLD_PSW+1,0x01 # interrupting from user ? jz mcck_no_vtime UPDATE_VTIME __LC_EXIT_TIMER,__LC_ASYNC_ENTER_TIMER,__LC_USER_TIMER UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER mvc __LC_LAST_UPDATE_TIMER(8),__LC_ASYNC_ENTER_TIMER mcck_no_vtime: #endif - brasl %r14,s390_do_machine_check +0: + tm __LC_MCCK_CODE+2,0x09 # mwp + ia of old psw valid? + jno mcck_int_main # no -> skip cleanup critical + tm __LC_MCK_OLD_PSW+1,0x01 # test problem state bit + jnz mcck_int_main # from user -> load kernel stack + clc __LC_MCK_OLD_PSW+8(8),BASED(.Lcritical_end) + jhe mcck_int_main + clc __LC_MCK_OLD_PSW+8(8),BASED(.Lcritical_start) + jl mcck_int_main + brasl %r14,cleanup_critical +mcck_int_main: + lg %r14,__LC_PANIC_STACK # are we already on the panic stack? + slgr %r14,%r15 + srag %r14,%r14,PAGE_SHIFT + jz 0f + lg %r15,__LC_PANIC_STACK # load panic stack +0: CREATE_STACK_FRAME __LC_MCK_OLD_PSW,__LC_SAVE_AREA+64 + lg %r9,__LC_THREAD_INFO # load pointer to thread_info struct + la %r2,SP_PTREGS(%r15) # load pt_regs + brasl %r14,s390_do_machine_check + tm SP_PSW+1(%r15),0x01 # returning to user ? + jno mcck_return + lg %r1,__LC_KERNEL_STACK # switch to kernel stack + aghi %r1,-SP_SIZE + mvc SP_PTREGS(__PT_SIZE,%r1),SP_PTREGS(%r15) + xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1) # clear back chain + lgr %r15,%r1 + stosm __SF_EMPTY(%r15),0x04 # turn dat on + tm __TI_flags+7(%r9),_TIF_MCCK_PENDING + jno mcck_return + brasl %r14,s390_handle_mcck mcck_return: RESTORE_ALL 0 @@ -775,7 +853,7 @@ cleanup_critical: clc 8(8,%r12),BASED(cleanup_table_sysc_work_loop) jl 0f clc 8(8,%r12),BASED(cleanup_table_sysc_work_loop+8) - jl cleanup_sysc_leave + jl cleanup_sysc_return 0: br %r14 @@ -793,6 +871,7 @@ cleanup_system_call: mvc __LC_SAVE_AREA(32),__LC_SAVE_AREA+32 0: stg %r13,__LC_SAVE_AREA+40 SAVE_ALL __LC_SVC_OLD_PSW,__LC_SAVE_AREA,1 + CREATE_STACK_FRAME __LC_SVC_OLD_PSW,__LC_SAVE_AREA stg %r15,__LC_SAVE_AREA+56 llgh %r7,__LC_SVC_INT_CODE #ifdef CONFIG_VIRT_CPU_ACCOUNTING diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c new file mode 100644 index 000000000000..2721c3a32b84 --- /dev/null +++ b/arch/s390/kernel/machine_kexec.c @@ -0,0 +1,98 @@ +/* + * arch/s390/kernel/machine_kexec.c + * + * (C) Copyright IBM Corp. 2005 + * + * Author(s): Rolf Adelsberger <adelsberger@de.ibm.com> + * + */ + +/* + * s390_machine_kexec.c - handle the transition of Linux booting another kernel + * on the S390 architecture. + */ + +#include <asm/cio.h> +#include <asm/setup.h> +#include <linux/device.h> +#include <linux/mm.h> +#include <linux/kexec.h> +#include <linux/delay.h> +#include <asm/pgtable.h> +#include <asm/pgalloc.h> +#include <asm/system.h> + +static void kexec_halt_all_cpus(void *); + +typedef void (*relocate_kernel_t) (kimage_entry_t *, unsigned long); + +const extern unsigned char relocate_kernel[]; +const extern unsigned long long relocate_kernel_len; + +int +machine_kexec_prepare(struct kimage *image) +{ + unsigned long reboot_code_buffer; + + /* We don't support anything but the default image type for now. */ + if (image->type != KEXEC_TYPE_DEFAULT) + return -EINVAL; + + /* Get the destination where the assembler code should be copied to.*/ + reboot_code_buffer = page_to_pfn(image->control_code_page)<<PAGE_SHIFT; + + /* Then copy it */ + memcpy((void *) reboot_code_buffer, relocate_kernel, + relocate_kernel_len); + return 0; +} + +void +machine_kexec_cleanup(struct kimage *image) +{ +} + +void +machine_shutdown(void) +{ + printk(KERN_INFO "kexec: machine_shutdown called\n"); +} + +NORET_TYPE void +machine_kexec(struct kimage *image) +{ + clear_all_subchannels(); + + /* Disable lowcore protection */ + ctl_clear_bit(0,28); + + on_each_cpu(kexec_halt_all_cpus, image, 0, 0); + for (;;); +} + +static void +kexec_halt_all_cpus(void *kernel_image) +{ + static atomic_t cpuid = ATOMIC_INIT(-1); + int cpu; + struct kimage *image; + relocate_kernel_t data_mover; + + if (atomic_compare_and_swap(-1, smp_processor_id(), &cpuid)) + signal_processor(smp_processor_id(), sigp_stop); + + /* Wait for all other cpus to enter stopped state */ + for_each_online_cpu(cpu) { + if (cpu == smp_processor_id()) + continue; + while (!smp_cpu_not_running(cpu)) + cpu_relax(); + } + + image = (struct kimage *) kernel_image; + data_mover = (relocate_kernel_t) + (page_to_pfn(image->control_code_page) << PAGE_SHIFT); + + /* Call the moving routine */ + (*data_mover) (&image->head, image->start); +} diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 7aea25d6e300..9f3dff6c0b72 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -91,13 +91,12 @@ void do_monitor_call(struct pt_regs *regs, long interruption_code) (void *)(long) smp_processor_id()); } +extern void s390_handle_mcck(void); /* * The idle loop on a S390... */ void default_idle(void) { - psw_t wait_psw; - unsigned long reg; int cpu, rc; local_irq_disable(); @@ -125,38 +124,17 @@ void default_idle(void) cpu_die(); #endif - /* - * Wait for external, I/O or machine check interrupt and - * switch off machine check bit after the wait has ended. - */ - wait_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK | PSW_MASK_WAIT | - PSW_MASK_IO | PSW_MASK_EXT; -#ifndef CONFIG_ARCH_S390X - asm volatile ( - " basr %0,0\n" - "0: la %0,1f-0b(%0)\n" - " st %0,4(%1)\n" - " oi 4(%1),0x80\n" - " lpsw 0(%1)\n" - "1: la %0,2f-1b(%0)\n" - " st %0,4(%1)\n" - " oi 4(%1),0x80\n" - " ni 1(%1),0xf9\n" - " lpsw 0(%1)\n" - "2:" - : "=&a" (reg) : "a" (&wait_psw) : "memory", "cc" ); -#else /* CONFIG_ARCH_S390X */ - asm volatile ( - " larl %0,0f\n" - " stg %0,8(%1)\n" - " lpswe 0(%1)\n" - "0: larl %0,1f\n" - " stg %0,8(%1)\n" - " ni 1(%1),0xf9\n" - " lpswe 0(%1)\n" - "1:" - : "=&a" (reg) : "a" (&wait_psw) : "memory", "cc" ); -#endif /* CONFIG_ARCH_S390X */ + local_mcck_disable(); + if (test_thread_flag(TIF_MCCK_PENDING)) { + local_mcck_enable(); + local_irq_enable(); + s390_handle_mcck(); + return; + } + + /* Wait for external, I/O or machine check interrupt. */ + __load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_WAIT | + PSW_MASK_IO | PSW_MASK_EXT); } void cpu_idle(void) diff --git a/arch/s390/kernel/relocate_kernel.S b/arch/s390/kernel/relocate_kernel.S new file mode 100644 index 000000000000..d5e4a62fbb79 --- /dev/null +++ b/arch/s390/kernel/relocate_kernel.S @@ -0,0 +1,81 @@ +/* + * arch/s390/kernel/relocate_kernel.S + * + * (C) Copyright IBM Corp. 2005 + * + * Author(s): Rolf Adelsberger <adelsberger@de.ibm.com> + * + */ + +/* + * moves the new kernel to its destination... + * %r2 = pointer to first kimage_entry_t + * %r3 = start address - where to jump to after the job is done... + * + * %r5 will be used as temp. storage + * %r6 holds the destination address + * %r7 = PAGE_SIZE + * %r8 holds the source address + * %r9 = PAGE_SIZE + * %r10 is a page mask + */ + + .text + .globl relocate_kernel + relocate_kernel: + basr %r13,0 #base address + .base: + spx zero64-.base(%r13) #absolute addressing mode + stnsm sys_msk-.base(%r13),0xf8 #disable DAT and IRQ (external) + lhi %r10,-1 #preparing the mask + sll %r10,12 #shift it such that it becomes 0xf000 + .top: + lhi %r7,4096 #load PAGE_SIZE in r7 + lhi %r9,4096 #load PAGE_SIZE in r9 + l %r5,0(%r2) #read another word for indirection page + ahi %r2,4 #increment pointer + tml %r5,0x1 #is it a destination page? + je .indir_check #NO, goto "indir_check" + lr %r6,%r5 #r6 = r5 + nr %r6,%r10 #mask it out and... + j .top #...next iteration + .indir_check: + tml %r5,0x2 #is it a indirection page? + je .done_test #NO, goto "done_test" + nr %r5,%r10 #YES, mask out, + lr %r2,%r5 #move it into the right register, + j .top #and read next... + .done_test: + tml %r5,0x4 #is it the done indicator? + je .source_test #NO! Well, then it should be the source indicator... + j .done #ok, lets finish it here... + .source_test: + tml %r5,0x8 #it should be a source indicator... + je .top #NO, ignore it... + lr %r8,%r5 #r8 = r5 + nr %r8,%r10 #masking + 0: mvcle %r6,%r8,0x0 #copy PAGE_SIZE bytes from r8 to r6 - pad with 0 + jo 0b + j .top + .done: + sr %r0,%r0 #clear register r0 + la %r4,load_psw-.base(%r13) #load psw-address into the register + o %r3,4(%r4) #or load address into psw + st %r3,4(%r4) + mvc 0(8,%r0),0(%r4) #copy psw to absolute address 0 + sr %r1,%r1 #clear %r1 + sr %r2,%r2 #clear %r2 + sigp %r1,%r2,0x12 #set cpuid to zero + lpsw 0 #hopefully start new kernel... + + .align 8 + zero64: + .quad 0 + load_psw: + .long 0x00080000,0x80000000 + sys_msk: + .quad 0 + relocate_kernel_end: + .globl relocate_kernel_len + relocate_kernel_len: + .quad relocate_kernel_end - relocate_kernel diff --git a/arch/s390/kernel/relocate_kernel64.S b/arch/s390/kernel/relocate_kernel64.S new file mode 100644 index 000000000000..96290cc4eb3c --- /dev/null +++ b/arch/s390/kernel/relocate_kernel64.S @@ -0,0 +1,82 @@ +/* + * arch/s390/kernel/relocate_kernel64.S + * + * (C) Copyright IBM Corp. 2005 + * + * Author(s): Rolf Adelsberger <adelsberger@de.ibm.com> + * + */ + +/* + * moves the new kernel to its destination... + * %r2 = pointer to first kimage_entry_t + * %r3 = start address - where to jump to after the job is done... + * + * %r5 will be used as temp. storage + * %r6 holds the destination address + * %r7 = PAGE_SIZE + * %r8 holds the source address + * %r9 = PAGE_SIZE + * + * 0xf000 is a page_mask + */ + + .text + .globl relocate_kernel + relocate_kernel: + basr %r13,0 #base address + .base: + spx zero64-.base(%r13) #absolute addressing mode + stnsm sys_msk-.base(%r13),0xf8 #disable DAT and IRQ (external) + .top: + lghi %r7,4096 #load PAGE_SIZE in r7 + lghi %r9,4096 #load PAGE_SIZE in r9 + lg %r5,0(%r2) #read another word for indirection page + aghi %r2,8 #increment pointer + tml %r5,0x1 #is it a destination page? + je .indir_check #NO, goto "indir_check" + lgr %r6,%r5 #r6 = r5 + nill %r6,0xf000 #mask it out and... + j .top #...next iteration + .indir_check: + tml %r5,0x2 #is it a indirection page? + je .done_test #NO, goto "done_test" + nill %r5,0xf000 #YES, mask out, + lgr %r2,%r5 #move it into the right register, + j .top #and read next... + .done_test: + tml %r5,0x4 #is it the done indicator? + je .source_test #NO! Well, then it should be the source indicator... + j .done #ok, lets finish it here... + .source_test: + tml %r5,0x8 #it should be a source indicator... + je .top #NO, ignore it... + lgr %r8,%r5 #r8 = r5 + nill %r8,0xf000 #masking + 0: mvcle %r6,%r8,0x0 #copy PAGE_SIZE bytes from r8 to r6 - pad with 0 + jo 0b + j .top + .done: + sgr %r0,%r0 #clear register r0 + la %r4,load_psw-.base(%r13) #load psw-address into the register + o %r3,4(%r4) #or load address into psw + st %r3,4(%r4) + mvc 0(8,%r0),0(%r4) #copy psw to absolute address 0 + sam31 #31 bit mode + sr %r1,%r1 #erase register r1 + sr %r2,%r2 #erase register r2 + sigp %r1,%r2,0x12 #set cpuid to zero + lpsw 0 #hopefully start new kernel... + + .align 8 + zero64: + .quad 0 + load_psw: + .long 0x00080000,0x80000000 + sys_msk: + .quad 0 + relocate_kernel_end: + .globl relocate_kernel_len + relocate_kernel_len: + .quad relocate_kernel_end - relocate_kernel + diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index df83215beac3..b6d740ac0e6e 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -198,11 +198,11 @@ static void __init conmode_default(void) char *ptr; if (MACHINE_IS_VM) { - __cpcmd("QUERY CONSOLE", query_buffer, 1024); + __cpcmd("QUERY CONSOLE", query_buffer, 1024, NULL); console_devno = simple_strtoul(query_buffer + 5, NULL, 16); ptr = strstr(query_buffer, "SUBCHANNEL ="); console_irq = simple_strtoul(ptr + 13, NULL, 16); - __cpcmd("QUERY TERM", query_buffer, 1024); + __cpcmd("QUERY TERM", query_buffer, 1024, NULL); ptr = strstr(query_buffer, "CONMODE"); /* * Set the conmode to 3215 so that the device recognition @@ -211,7 +211,7 @@ static void __init conmode_default(void) * 3215 and the 3270 driver will try to access the console * device (3215 as console and 3270 as normal tty). */ - __cpcmd("TERM CONMODE 3215", NULL, 0); + __cpcmd("TERM CONMODE 3215", NULL, 0, NULL); if (ptr == NULL) { #if defined(CONFIG_SCLP_CONSOLE) SET_CONSOLE_SCLP; @@ -414,7 +414,8 @@ setup_lowcore(void) lc->program_new_psw.mask = PSW_KERNEL_BITS; lc->program_new_psw.addr = PSW_ADDR_AMODE | (unsigned long)pgm_check_handler; - lc->mcck_new_psw.mask = PSW_KERNEL_BITS; + lc->mcck_new_psw.mask = + PSW_KERNEL_BITS & ~PSW_MASK_MCHECK & ~PSW_MASK_DAT; lc->mcck_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) mcck_int_handler; lc->io_new_psw.mask = PSW_KERNEL_BITS; @@ -424,12 +425,18 @@ setup_lowcore(void) lc->kernel_stack = ((unsigned long) &init_thread_union) + THREAD_SIZE; lc->async_stack = (unsigned long) __alloc_bootmem(ASYNC_SIZE, ASYNC_SIZE, 0) + ASYNC_SIZE; -#ifdef CONFIG_CHECK_STACK lc->panic_stack = (unsigned long) __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, 0) + PAGE_SIZE; -#endif lc->current_task = (unsigned long) init_thread_union.thread_info.task; lc->thread_info = (unsigned long) &init_thread_union; +#ifndef CONFIG_ARCH_S390X + if (MACHINE_HAS_IEEE) { + lc->extended_save_area_addr = (__u32) + __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, 0); + /* enable extended save area */ + ctl_set_bit(14, 29); + } +#endif #ifdef CONFIG_ARCH_S390X if (MACHINE_HAS_DIAG44) lc->diag44_opcode = 0x83000044; diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index fdfcf0488b49..642572a8e334 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -284,7 +284,7 @@ static void do_machine_restart(void * __unused) * locks are always held disabled). */ if (MACHINE_IS_VM) - cpcmd ("IPL", NULL, 0); + cpcmd ("IPL", NULL, 0, NULL); else reipl (0x10000 | S390_lowcore.ipl_device); } @@ -313,7 +313,7 @@ static void do_machine_halt(void * __unused) if (atomic_compare_and_swap(-1, smp_processor_id(), &cpuid) == 0) { smp_send_stop(); if (MACHINE_IS_VM && strlen(vmhalt_cmd) > 0) - cpcmd(vmhalt_cmd, NULL, 0); + cpcmd(vmhalt_cmd, NULL, 0, NULL); signal_processor(smp_processor_id(), sigp_stop_and_store_status); } @@ -332,7 +332,7 @@ static void do_machine_power_off(void * __unused) if (atomic_compare_and_swap(-1, smp_processor_id(), &cpuid) == 0) { smp_send_stop(); if (MACHINE_IS_VM && strlen(vmpoff_cmd) > 0) - cpcmd(vmpoff_cmd, NULL, 0); + cpcmd(vmpoff_cmd, NULL, 0, NULL); signal_processor(smp_processor_id(), sigp_stop_and_store_status); } @@ -679,12 +679,14 @@ __cpu_disable(void) { unsigned long flags; ec_creg_mask_parms cr_parms; + int cpu = smp_processor_id(); spin_lock_irqsave(&smp_reserve_lock, flags); - if (smp_cpu_reserved[smp_processor_id()] != 0) { + if (smp_cpu_reserved[cpu] != 0) { spin_unlock_irqrestore(&smp_reserve_lock, flags); return -EBUSY; } + cpu_clear(cpu, cpu_online_map); #ifdef CONFIG_PFAULT /* Disable pfault pseudo page faults on this cpu. */ @@ -771,13 +773,24 @@ void __init smp_prepare_cpus(unsigned int max_cpus) *(lowcore_ptr[i]) = S390_lowcore; lowcore_ptr[i]->async_stack = stack + (ASYNC_SIZE); -#ifdef CONFIG_CHECK_STACK stack = __get_free_pages(GFP_KERNEL,0); if (stack == 0ULL) panic("smp_boot_cpus failed to allocate memory\n"); lowcore_ptr[i]->panic_stack = stack + (PAGE_SIZE); +#ifndef __s390x__ + if (MACHINE_HAS_IEEE) { + lowcore_ptr[i]->extended_save_area_addr = + (__u32) __get_free_pages(GFP_KERNEL,0); + if (lowcore_ptr[i]->extended_save_area_addr == 0) + panic("smp_boot_cpus failed to " + "allocate memory\n"); + } #endif } +#ifndef __s390x__ + if (MACHINE_HAS_IEEE) + ctl_set_bit(14, 29); /* enable extended save area */ +#endif set_prefix((u32)(unsigned long) lowcore_ptr[smp_processor_id()]); for_each_cpu(cpu) diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index 515938628f82..a8668afb5f87 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -285,7 +285,7 @@ SYSCALL(sys_mq_timedsend,sys_mq_timedsend,compat_sys_mq_timedsend_wrapper) SYSCALL(sys_mq_timedreceive,sys_mq_timedreceive,compat_sys_mq_timedreceive_wrapper) SYSCALL(sys_mq_notify,sys_mq_notify,compat_sys_mq_notify_wrapper) /* 275 */ SYSCALL(sys_mq_getsetattr,sys_mq_getsetattr,compat_sys_mq_getsetattr_wrapper) -NI_SYSCALL /* reserved for kexec */ +SYSCALL(sys_kexec_load,sys_kexec_load,compat_sys_kexec_load_wrapper) SYSCALL(sys_add_key,sys_add_key,compat_sys_add_key_wrapper) SYSCALL(sys_request_key,sys_request_key,compat_sys_request_key_wrapper) SYSCALL(sys_keyctl,sys_keyctl,compat_sys_keyctl) /* 280 */ diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index ca34b6f34b38..bc7b7be7acbe 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -735,7 +735,7 @@ void __init trap_init(void) &ext_int_pfault); #endif #ifndef CONFIG_ARCH_S390X - cpcmd("SET PAGEX ON", NULL, 0); + cpcmd("SET PAGEX ON", NULL, 0, NULL); #endif } } diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c index 648deed17e25..c5348108ca3c 100644 --- a/arch/s390/mm/extmem.c +++ b/arch/s390/mm/extmem.c @@ -576,8 +576,8 @@ segment_save(char *name) segtype_string[seg->range[i].start & 0xff]); } sprintf(cmd2, "SAVESEG %s", name); - cpcmd(cmd1, NULL, 0); - cpcmd(cmd2, NULL, 0); + cpcmd(cmd1, NULL, 0, NULL); + cpcmd(cmd2, NULL, 0, NULL); spin_unlock(&dcss_lock); } diff --git a/arch/um/drivers/daemon_user.c b/arch/um/drivers/daemon_user.c index cf15b4a8b517..c1b03f7c1daa 100644 --- a/arch/um/drivers/daemon_user.c +++ b/arch/um/drivers/daemon_user.c @@ -157,9 +157,9 @@ static void daemon_remove(void *data) os_close_file(pri->fd); os_close_file(pri->control); - if(pri->data_addr != NULL) kfree(pri->data_addr); - if(pri->ctl_addr != NULL) kfree(pri->ctl_addr); - if(pri->local_addr != NULL) kfree(pri->local_addr); + kfree(pri->data_addr); + kfree(pri->ctl_addr); + kfree(pri->local_addr); } int daemon_user_write(int fd, void *buf, int len, struct daemon_data *pri) diff --git a/arch/um/drivers/line.c b/arch/um/drivers/line.c index 0f59736db329..2bb4c4f5dec4 100644 --- a/arch/um/drivers/line.c +++ b/arch/um/drivers/line.c @@ -602,11 +602,26 @@ int line_get_config(char *name, struct line *lines, unsigned int num, char *str, return n; } -int line_remove(struct line *lines, unsigned int num, char *str) +int line_id(char **str, int *start_out, int *end_out) +{ + char *end; + int n; + + n = simple_strtoul(*str, &end, 0); + if((*end != '\0') || (end == *str)) + return -1; + + *str = end; + *start_out = n; + *end_out = n; + return n; +} + +int line_remove(struct line *lines, unsigned int num, int n) { char config[sizeof("conxxxx=none\0")]; - sprintf(config, "%s=none", str); + sprintf(config, "%d=none", n); return !line_setup(lines, num, config, 0); } diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c index d7c7adcc0a67..404de41a4f67 100644 --- a/arch/um/drivers/mconsole_kern.c +++ b/arch/um/drivers/mconsole_kern.c @@ -419,8 +419,9 @@ void mconsole_config(struct mc_request *req) void mconsole_remove(struct mc_request *req) { struct mc_device *dev; - char *ptr = req->request.data; - int err; + char *ptr = req->request.data, *err_msg = ""; + char error[256]; + int err, start, end, n; ptr += strlen("remove"); while(isspace(*ptr)) ptr++; @@ -429,8 +430,35 @@ void mconsole_remove(struct mc_request *req) mconsole_reply(req, "Bad remove option", 1, 0); return; } - err = (*dev->remove)(&ptr[strlen(dev->name)]); - mconsole_reply(req, "", err, 0); + + ptr = &ptr[strlen(dev->name)]; + + err = 1; + n = (*dev->id)(&ptr, &start, &end); + if(n < 0){ + err_msg = "Couldn't parse device number"; + goto out; + } + else if((n < start) || (n > end)){ + sprintf(error, "Invalid device number - must be between " + "%d and %d", start, end); + err_msg = error; + goto out; + } + + err = (*dev->remove)(n); + switch(err){ + case -ENODEV: + err_msg = "Device doesn't exist"; + break; + case -EBUSY: + err_msg = "Device is currently open"; + break; + default: + break; + } + out: + mconsole_reply(req, err_msg, err, 0); } #ifdef CONFIG_MAGIC_SYSRQ diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c index 5388a7428691..1495007bf6c0 100644 --- a/arch/um/drivers/net_kern.c +++ b/arch/um/drivers/net_kern.c @@ -612,25 +612,35 @@ static int net_config(char *str) return(err); } -static int net_remove(char *str) +static int net_id(char **str, int *start_out, int *end_out) +{ + char *end; + int n; + + n = simple_strtoul(*str, &end, 0); + if((*end != '\0') || (end == *str)) + return -1; + + *start_out = n; + *end_out = n; + *str = end; + return n; +} + +static int net_remove(int n) { struct uml_net *device; struct net_device *dev; struct uml_net_private *lp; - char *end; - int n; - - n = simple_strtoul(str, &end, 0); - if((*end != '\0') || (end == str)) - return(-1); device = find_device(n); if(device == NULL) - return(0); + return -ENODEV; dev = device->dev; lp = dev->priv; - if(lp->fd > 0) return(-1); + if(lp->fd > 0) + return -EBUSY; if(lp->remove != NULL) (*lp->remove)(&lp->user); unregister_netdev(dev); platform_device_unregister(&device->pdev); @@ -638,13 +648,14 @@ static int net_remove(char *str) list_del(&device->list); kfree(device); free_netdev(dev); - return(0); + return 0; } static struct mc_device net_mc = { .name = "eth", .config = net_config, .get_config = NULL, + .id = net_id, .remove = net_remove, }; diff --git a/arch/um/drivers/ssl.c b/arch/um/drivers/ssl.c index b32a77010fbe..62e04ecfada8 100644 --- a/arch/um/drivers/ssl.c +++ b/arch/um/drivers/ssl.c @@ -49,7 +49,7 @@ static struct chan_opts opts = { static int ssl_config(char *str); static int ssl_get_config(char *dev, char *str, int size, char **error_out); -static int ssl_remove(char *str); +static int ssl_remove(int n); static struct line_driver driver = { .name = "UML serial line", @@ -69,6 +69,7 @@ static struct line_driver driver = { .name = "ssl", .config = ssl_config, .get_config = ssl_get_config, + .id = line_id, .remove = ssl_remove, }, }; @@ -94,10 +95,10 @@ static int ssl_get_config(char *dev, char *str, int size, char **error_out) str, size, error_out)); } -static int ssl_remove(char *str) +static int ssl_remove(int n) { - return(line_remove(serial_lines, - sizeof(serial_lines)/sizeof(serial_lines[0]), str)); + return line_remove(serial_lines, + sizeof(serial_lines)/sizeof(serial_lines[0]), n); } int ssl_open(struct tty_struct *tty, struct file *filp) diff --git a/arch/um/drivers/stdio_console.c b/arch/um/drivers/stdio_console.c index afbe1e71ed83..005aa6333b6e 100644 --- a/arch/um/drivers/stdio_console.c +++ b/arch/um/drivers/stdio_console.c @@ -55,7 +55,7 @@ static struct chan_opts opts = { static int con_config(char *str); static int con_get_config(char *dev, char *str, int size, char **error_out); -static int con_remove(char *str); +static int con_remove(int n); static struct line_driver driver = { .name = "UML console", @@ -75,6 +75,7 @@ static struct line_driver driver = { .name = "con", .config = con_config, .get_config = con_get_config, + .id = line_id, .remove = con_remove, }, }; @@ -99,9 +100,9 @@ static int con_get_config(char *dev, char *str, int size, char **error_out) size, error_out)); } -static int con_remove(char *str) +static int con_remove(int n) { - return(line_remove(vts, sizeof(vts)/sizeof(vts[0]), str)); + return line_remove(vts, sizeof(vts)/sizeof(vts[0]), n); } static int con_open(struct tty_struct *tty, struct file *filp) diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index 2a7f6892c55c..344b24d09a7c 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -754,24 +754,34 @@ static int ubd_get_config(char *name, char *str, int size, char **error_out) return(len); } -static int ubd_remove(char *str) +static int ubd_id(char **str, int *start_out, int *end_out) +{ + int n; + + n = parse_unit(str); + *start_out = 0; + *end_out = MAX_DEV - 1; + return n; +} + +static int ubd_remove(int n) { struct ubd *dev; - int n, err = -ENODEV; + int err = -ENODEV; - n = parse_unit(&str); + spin_lock(&ubd_lock); - if((n < 0) || (n >= MAX_DEV)) - return(err); + if(ubd_gendisk[n] == NULL) + goto out; dev = &ubd_dev[n]; - if(dev->count > 0) - return(-EBUSY); /* you cannot remove a open disk */ - err = 0; - spin_lock(&ubd_lock); + if(dev->file == NULL) + goto out; - if(ubd_gendisk[n] == NULL) + /* you cannot remove a open disk */ + err = -EBUSY; + if(dev->count > 0) goto out; del_gendisk(ubd_gendisk[n]); @@ -787,15 +797,16 @@ static int ubd_remove(char *str) platform_device_unregister(&dev->pdev); *dev = ((struct ubd) DEFAULT_UBD); err = 0; - out: - spin_unlock(&ubd_lock); - return(err); +out: + spin_unlock(&ubd_lock); + return err; } static struct mc_device ubd_mc = { .name = "ubd", .config = ubd_config, .get_config = ubd_get_config, + .id = ubd_id, .remove = ubd_remove, }; diff --git a/arch/um/include/line.h b/arch/um/include/line.h index 4c5e92c04ccb..5323d22a6ca7 100644 --- a/arch/um/include/line.h +++ b/arch/um/include/line.h @@ -101,7 +101,8 @@ extern void lines_init(struct line *lines, int nlines); extern void close_lines(struct line *lines, int nlines); extern int line_config(struct line *lines, unsigned int sizeof_lines, char *str); -extern int line_remove(struct line *lines, unsigned int sizeof_lines, char *str); +extern int line_id(char **str, int *start_out, int *end_out); +extern int line_remove(struct line *lines, unsigned int sizeof_lines, int n); extern int line_get_config(char *dev, struct line *lines, unsigned int sizeof_lines, char *str, int size, char **error_out); diff --git a/arch/um/include/mconsole_kern.h b/arch/um/include/mconsole_kern.h index 61c274fcee5d..d86ee14260ce 100644 --- a/arch/um/include/mconsole_kern.h +++ b/arch/um/include/mconsole_kern.h @@ -20,7 +20,8 @@ struct mc_device { char *name; int (*config)(char *); int (*get_config)(char *, char *, int, char **); - int (*remove)(char *); + int (*id)(char **, int *, int *); + int (*remove)(int); }; #define CONFIG_CHUNK(str, size, current, chunk, end) \ diff --git a/arch/um/include/time_user.h b/arch/um/include/time_user.h index 6793a2fcd0ae..f64ef77019a3 100644 --- a/arch/um/include/time_user.h +++ b/arch/um/include/time_user.h @@ -8,11 +8,11 @@ extern void timer(void); extern void switch_timers(int to_real); -extern void set_interval(int timer_type); extern void idle_sleep(int secs); extern void enable_timer(void); extern void disable_timer(void); extern unsigned long time_lock(void); extern void time_unlock(unsigned long); +extern void user_time_init(void); #endif diff --git a/arch/um/kernel/main.c b/arch/um/kernel/main.c index e59f58152678..1e1a87f1c510 100644 --- a/arch/um/kernel/main.c +++ b/arch/um/kernel/main.c @@ -69,7 +69,6 @@ static __init void do_uml_initcalls(void) static void last_ditch_exit(int sig) { - kmalloc_ok = 0; signal(SIGINT, SIG_DFL); signal(SIGTERM, SIG_DFL); signal(SIGHUP, SIG_DFL); diff --git a/arch/um/kernel/process_kern.c b/arch/um/kernel/process_kern.c index 157584ae4792..d4036ed680bc 100644 --- a/arch/um/kernel/process_kern.c +++ b/arch/um/kernel/process_kern.c @@ -96,8 +96,8 @@ int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) current->thread.request.u.thread.proc = fn; current->thread.request.u.thread.arg = arg; - pid = do_fork(CLONE_VM | CLONE_UNTRACED | flags, 0, NULL, 0, NULL, - NULL); + pid = do_fork(CLONE_VM | CLONE_UNTRACED | flags, 0, + ¤t->thread.regs, 0, NULL, NULL); if(pid < 0) panic("do_fork failed in kernel_thread, errno = %d", pid); return(pid); @@ -169,7 +169,7 @@ int current_pid(void) void default_idle(void) { - uml_idle_timer(); + CHOOSE_MODE(uml_idle_timer(), (void) 0); atomic_inc(&init_mm.mm_count); current->mm = &init_mm; diff --git a/arch/um/kernel/reboot.c b/arch/um/kernel/reboot.c index 207f89d74908..fcec51da1d37 100644 --- a/arch/um/kernel/reboot.c +++ b/arch/um/kernel/reboot.c @@ -38,14 +38,14 @@ static void kill_off_processes(void) void uml_cleanup(void) { - kill_off_processes(); + kmalloc_ok = 0; do_uml_exitcalls(); + kill_off_processes(); } void machine_restart(char * __unused) { - do_uml_exitcalls(); - kill_off_processes(); + uml_cleanup(); CHOOSE_MODE(reboot_tt(), reboot_skas()); } @@ -53,8 +53,7 @@ EXPORT_SYMBOL(machine_restart); void machine_power_off(void) { - do_uml_exitcalls(); - kill_off_processes(); + uml_cleanup(); CHOOSE_MODE(halt_tt(), halt_skas()); } diff --git a/arch/um/kernel/skas/Makefile b/arch/um/kernel/skas/Makefile index d37d1bfcd6f7..ff69c4b312c0 100644 --- a/arch/um/kernel/skas/Makefile +++ b/arch/um/kernel/skas/Makefile @@ -4,10 +4,10 @@ # obj-y := exec_kern.o mem.o mem_user.o mmu.o process.o process_kern.o \ - syscall_kern.o syscall_user.o time.o tlb.o trap_user.o uaccess.o \ + syscall_kern.o syscall_user.o tlb.o trap_user.o uaccess.o \ subdir- := util -USER_OBJS := process.o time.o +USER_OBJS := process.o include arch/um/scripts/Makefile.rules diff --git a/arch/um/kernel/skas/include/mode-skas.h b/arch/um/kernel/skas/include/mode-skas.h index c1e33bd788db..bcd26a6a3888 100644 --- a/arch/um/kernel/skas/include/mode-skas.h +++ b/arch/um/kernel/skas/include/mode-skas.h @@ -13,7 +13,6 @@ extern unsigned long exec_fp_regs[]; extern unsigned long exec_fpx_regs[]; extern int have_fpx_regs; -extern void user_time_init_skas(void); extern void sig_handler_common_skas(int sig, void *sc_ptr); extern void halt_skas(void); extern void reboot_skas(void); diff --git a/arch/um/kernel/skas/process_kern.c b/arch/um/kernel/skas/process_kern.c index fc71ef295782..0a7b8aa55db8 100644 --- a/arch/um/kernel/skas/process_kern.c +++ b/arch/um/kernel/skas/process_kern.c @@ -111,8 +111,7 @@ int copy_thread_skas(int nr, unsigned long clone_flags, unsigned long sp, void (*handler)(int); if(current->thread.forking){ - memcpy(&p->thread.regs.regs.skas, - ¤t->thread.regs.regs.skas, + memcpy(&p->thread.regs.regs.skas, ®s->regs.skas, sizeof(p->thread.regs.regs.skas)); REGS_SET_SYSCALL_RETURN(p->thread.regs.regs.skas.regs, 0); if(sp != 0) REGS_SP(p->thread.regs.regs.skas.regs) = sp; @@ -181,7 +180,6 @@ int start_uml_skas(void) start_userspace(0); init_new_thread_signals(1); - uml_idle_timer(); init_task.thread.request.u.thread.proc = start_kernel_proc; init_task.thread.request.u.thread.arg = NULL; @@ -201,14 +199,3 @@ int thread_pid_skas(struct task_struct *task) #warning Need to look up userspace_pid by cpu return(userspace_pid[0]); } - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/kernel/skas/time.c b/arch/um/kernel/skas/time.c deleted file mode 100644 index 98091494b897..000000000000 --- a/arch/um/kernel/skas/time.c +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) - * Licensed under the GPL - */ - -#include <sys/signal.h> -#include <sys/time.h> -#include "time_user.h" -#include "process.h" -#include "user.h" - -void user_time_init_skas(void) -{ - if(signal(SIGALRM, (__sighandler_t) alarm_handler) == SIG_ERR) - panic("Couldn't set SIGALRM handler"); - if(signal(SIGVTALRM, (__sighandler_t) alarm_handler) == SIG_ERR) - panic("Couldn't set SIGVTALRM handler"); - set_interval(ITIMER_VIRTUAL); -} - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/kernel/syscall_kern.c b/arch/um/kernel/syscall_kern.c index b7a55251e897..8e1a3501ff46 100644 --- a/arch/um/kernel/syscall_kern.c +++ b/arch/um/kernel/syscall_kern.c @@ -31,7 +31,8 @@ long sys_fork(void) long ret; current->thread.forking = 1; - ret = do_fork(SIGCHLD, 0, NULL, 0, NULL, NULL); + ret = do_fork(SIGCHLD, UPT_SP(¤t->thread.regs.regs), + ¤t->thread.regs, 0, NULL, NULL); current->thread.forking = 0; return(ret); } @@ -41,8 +42,9 @@ long sys_vfork(void) long ret; current->thread.forking = 1; - ret = do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, 0, NULL, 0, NULL, - NULL); + ret = do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, + UPT_SP(¤t->thread.regs.regs), + ¤t->thread.regs, 0, NULL, NULL); current->thread.forking = 0; return(ret); } @@ -162,14 +164,3 @@ int next_syscall_index(int limit) spin_unlock(&syscall_lock); return(ret); } - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c index c40c86a3f918..f829b309b63c 100644 --- a/arch/um/kernel/time.c +++ b/arch/um/kernel/time.c @@ -33,7 +33,7 @@ void timer(void) timeradd(&xtime, &local_offset, &xtime); } -void set_interval(int timer_type) +static void set_interval(int timer_type) { int usec = 1000000/hz(); struct itimerval interval = ((struct itimerval) { { 0, usec }, @@ -45,12 +45,7 @@ void set_interval(int timer_type) void enable_timer(void) { - int usec = 1000000/hz(); - struct itimerval enable = ((struct itimerval) { { 0, usec }, - { 0, usec }}); - if(setitimer(ITIMER_VIRTUAL, &enable, NULL)) - printk("enable_timer - setitimer failed, errno = %d\n", - errno); + set_interval(ITIMER_VIRTUAL); } void disable_timer(void) @@ -155,13 +150,15 @@ void idle_sleep(int secs) nanosleep(&ts, NULL); } -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ +/* XXX This partly duplicates init_irq_signals */ + +void user_time_init(void) +{ + set_handler(SIGVTALRM, (__sighandler_t) alarm_handler, + SA_ONSTACK | SA_RESTART, SIGUSR1, SIGIO, SIGWINCH, + SIGALRM, SIGUSR2, -1); + set_handler(SIGALRM, (__sighandler_t) alarm_handler, + SA_ONSTACK | SA_RESTART, SIGUSR1, SIGIO, SIGWINCH, + SIGVTALRM, SIGUSR2, -1); + set_interval(ITIMER_VIRTUAL); +} diff --git a/arch/um/kernel/time_kern.c b/arch/um/kernel/time_kern.c index 6516fc52afe0..a8b4ef601f59 100644 --- a/arch/um/kernel/time_kern.c +++ b/arch/um/kernel/time_kern.c @@ -162,7 +162,7 @@ int __init timer_init(void) { int err; - CHOOSE_MODE(user_time_init_tt(), user_time_init_skas()); + user_time_init(); err = request_irq(TIMER_IRQ, um_timer, SA_INTERRUPT, "timer", NULL); if(err != 0) printk(KERN_ERR "timer_init : request_irq failed - " diff --git a/arch/um/kernel/tt/Makefile b/arch/um/kernel/tt/Makefile index 3fd2554e60b6..6939e5af8472 100644 --- a/arch/um/kernel/tt/Makefile +++ b/arch/um/kernel/tt/Makefile @@ -4,11 +4,11 @@ # obj-y = exec_kern.o exec_user.o gdb.o ksyms.o mem.o mem_user.o process_kern.o \ - syscall_kern.o syscall_user.o time.o tlb.o tracer.o trap_user.o \ + syscall_kern.o syscall_user.o tlb.o tracer.o trap_user.o \ uaccess.o uaccess_user.o obj-$(CONFIG_PT_PROXY) += gdb_kern.o ptproxy/ -USER_OBJS := gdb.o time.o tracer.o +USER_OBJS := gdb.o tracer.o include arch/um/scripts/Makefile.rules diff --git a/arch/um/kernel/tt/gdb.c b/arch/um/kernel/tt/gdb.c index 19a0ad7b35b3..37e22d71a0d9 100644 --- a/arch/um/kernel/tt/gdb.c +++ b/arch/um/kernel/tt/gdb.c @@ -153,10 +153,10 @@ void remove_gdb_cb(void *unused) exit_debugger_cb(NULL); } -int gdb_remove(char *unused) +int gdb_remove(int unused) { initial_thread_cb(remove_gdb_cb, NULL); - return(0); + return 0; } void signal_usr1(int sig) diff --git a/arch/um/kernel/tt/gdb_kern.c b/arch/um/kernel/tt/gdb_kern.c index 93fb121f86af..26506388a6aa 100644 --- a/arch/um/kernel/tt/gdb_kern.c +++ b/arch/um/kernel/tt/gdb_kern.c @@ -10,7 +10,7 @@ #ifdef CONFIG_MCONSOLE extern int gdb_config(char *str); -extern int gdb_remove(char *unused); +extern int gdb_remove(int n); static struct mc_device gdb_mc = { .name = "gdb", diff --git a/arch/um/kernel/tt/include/debug.h b/arch/um/kernel/tt/include/debug.h index 8eff674107ca..738435461e13 100644 --- a/arch/um/kernel/tt/include/debug.h +++ b/arch/um/kernel/tt/include/debug.h @@ -4,8 +4,8 @@ * Licensed under the GPL */ -#ifndef __DEBUG_H -#define __DEBUG_H +#ifndef __UML_TT_DEBUG_H +#define __UML_TT_DEBUG_H extern int debugger_proxy(int status, pid_t pid); extern void child_proxy(pid_t pid, int status); @@ -13,17 +13,6 @@ extern void init_proxy (pid_t pid, int waiting, int status); extern int start_debugger(char *prog, int startup, int stop, int *debugger_fd); extern void fake_child_exit(void); extern int gdb_config(char *str); -extern int gdb_remove(char *unused); +extern int gdb_remove(int unused); #endif - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/kernel/tt/include/mode-tt.h b/arch/um/kernel/tt/include/mode-tt.h index efe462019069..e171e15fead5 100644 --- a/arch/um/kernel/tt/include/mode-tt.h +++ b/arch/um/kernel/tt/include/mode-tt.h @@ -13,7 +13,6 @@ enum { OP_NONE, OP_EXEC, OP_FORK, OP_TRACE_ON, OP_REBOOT, OP_HALT, OP_CB }; extern int tracing_pid; extern int tracer(int (*init_proc)(void *), void *sp); -extern void user_time_init_tt(void); extern void sig_handler_common_tt(int sig, void *sc); extern void syscall_handler_tt(int sig, union uml_pt_regs *regs); extern void reboot_tt(void); diff --git a/arch/um/kernel/tt/process_kern.c b/arch/um/kernel/tt/process_kern.c index 776310fd5b8b..a189a2b92935 100644 --- a/arch/um/kernel/tt/process_kern.c +++ b/arch/um/kernel/tt/process_kern.c @@ -266,10 +266,10 @@ int copy_thread_tt(int nr, unsigned long clone_flags, unsigned long sp, } if(current->thread.forking){ - sc_to_sc(UPT_SC(&p->thread.regs.regs), - UPT_SC(¤t->thread.regs.regs)); + sc_to_sc(UPT_SC(&p->thread.regs.regs), UPT_SC(®s->regs)); SC_SET_SYSCALL_RETURN(UPT_SC(&p->thread.regs.regs), 0); - if(sp != 0) SC_SP(UPT_SC(&p->thread.regs.regs)) = sp; + if(sp != 0) + SC_SP(UPT_SC(&p->thread.regs.regs)) = sp; } p->thread.mode.tt.extern_pid = new_pid; @@ -459,14 +459,3 @@ int is_valid_pid(int pid) read_unlock(&tasklist_lock); return(0); } - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/kernel/tt/time.c b/arch/um/kernel/tt/time.c deleted file mode 100644 index 8565b71b07cd..000000000000 --- a/arch/um/kernel/tt/time.c +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) - * Licensed under the GPL - */ - -#include <signal.h> -#include <sys/time.h> -#include <time_user.h> -#include "process.h" -#include "user.h" - -void user_time_init_tt(void) -{ - if(signal(SIGVTALRM, (__sighandler_t) alarm_handler) == SIG_ERR) - panic("Couldn't set SIGVTALRM handler"); - set_interval(ITIMER_VIRTUAL); -} - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/sys-i386/signal.c b/arch/um/sys-i386/signal.c index 03913ca5d256..4efc69a039d7 100644 --- a/arch/um/sys-i386/signal.c +++ b/arch/um/sys-i386/signal.c @@ -312,7 +312,7 @@ long sys_sigreturn(struct pt_regs regs) unsigned long __user *extramask = frame->extramask; int sig_size = (_NSIG_WORDS - 1) * sizeof(unsigned long); - if(copy_from_user(&set.sig[0], oldmask, sizeof(&set.sig[0])) || + if(copy_from_user(&set.sig[0], oldmask, sizeof(set.sig[0])) || copy_from_user(&set.sig[1], extramask, sig_size)) goto segfault; diff --git a/arch/um/sys-i386/syscalls.c b/arch/um/sys-i386/syscalls.c index 335e2d89504d..83e9be820a86 100644 --- a/arch/um/sys-i386/syscalls.c +++ b/arch/um/sys-i386/syscalls.c @@ -69,15 +69,11 @@ long sys_clone(unsigned long clone_flags, unsigned long newsp, { long ret; - /* XXX: normal arch do here this pass, and also pass the regs to - * do_fork, instead of NULL. Currently the arch-independent code - * ignores these values, while the UML code (actually it's - * copy_thread) does the right thing. But this should change, - probably. */ - /*if (!newsp) - newsp = UPT_SP(current->thread.regs);*/ + if (!newsp) + newsp = UPT_SP(¤t->thread.regs.regs); current->thread.forking = 1; - ret = do_fork(clone_flags, newsp, NULL, 0, parent_tid, child_tid); + ret = do_fork(clone_flags, newsp, ¤t->thread.regs, 0, parent_tid, + child_tid); current->thread.forking = 0; return(ret); } @@ -197,14 +193,3 @@ long sys_sigaction(int sig, const struct old_sigaction __user *act, return ret; } - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/sys-x86_64/syscalls.c b/arch/um/sys-x86_64/syscalls.c index 6f44f40204ed..3259a4db4534 100644 --- a/arch/um/sys-x86_64/syscalls.c +++ b/arch/um/sys-x86_64/syscalls.c @@ -174,26 +174,11 @@ long sys_clone(unsigned long clone_flags, unsigned long newsp, { long ret; - /* XXX: normal arch do here this pass, and also pass the regs to - * do_fork, instead of NULL. Currently the arch-independent code - * ignores these values, while the UML code (actually it's - * copy_thread) does the right thing. But this should change, - probably. */ - /*if (!newsp) - newsp = UPT_SP(current->thread.regs);*/ + if (!newsp) + newsp = UPT_SP(¤t->thread.regs.regs); current->thread.forking = 1; - ret = do_fork(clone_flags, newsp, NULL, 0, parent_tid, child_tid); + ret = do_fork(clone_flags, newsp, ¤t->thread.regs, 0, parent_tid, + child_tid); current->thread.forking = 0; return(ret); } - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig index db259757dc8a..d09437b5c48f 100644 --- a/arch/x86_64/Kconfig +++ b/arch/x86_64/Kconfig @@ -207,33 +207,6 @@ config SMP If you don't know what to do here, say N. -config PREEMPT - bool "Preemptible Kernel" - ---help--- - This option reduces the latency of the kernel when reacting to - real-time or interactive events by allowing a low priority process to - be preempted even if it is in kernel mode executing a system call. - This allows applications to run more reliably even when the system is - under load. On contrary it may also break your drivers and add - priority inheritance problems to your system. Don't select it if - you rely on a stable system or have slightly obscure hardware. - It's also not very well tested on x86-64 currently. - You have been warned. - - Say Y here if you are feeling brave and building a kernel for a - desktop, embedded or real-time system. Say N if you are unsure. - -config PREEMPT_BKL - bool "Preempt The Big Kernel Lock" - depends on PREEMPT - default y - help - This option reduces the latency of the kernel by making the - big kernel lock preemptible. - - Say Y here if you are building a kernel for a desktop system. - Say N if you are unsure. - config SCHED_SMT bool "SMT (Hyperthreading) scheduler support" depends on SMP @@ -244,6 +217,8 @@ config SCHED_SMT cost of slightly increased overhead in some places. If unsure say N here. +source "kernel/Kconfig.preempt" + config K8_NUMA bool "K8 NUMA support" select NUMA @@ -313,6 +288,15 @@ config NR_CPUS This is purely to save memory - each supported CPU requires memory in the static kernel configuration. +config HOTPLUG_CPU + bool "Support for hot-pluggable CPUs (EXPERIMENTAL)" + depends on SMP && HOTPLUG && EXPERIMENTAL + help + Say Y here to experiment with turning CPUs off and on. CPUs + can be controlled through /sys/devices/system/cpu/cpu#. + Say N if you want to disable CPU hotplug. + + config HPET_TIMER bool default y @@ -385,6 +369,34 @@ config X86_MCE_INTEL Additional support for intel specific MCE features such as the thermal monitor. +config PHYSICAL_START + hex "Physical address where the kernel is loaded" if EMBEDDED + default "0x100000" + help + This gives the physical address where the kernel is loaded. + Primarily used in the case of kexec on panic where the + fail safe kernel needs to run at a different address than + the panic-ed kernel. + + Don't change this unless you know what you are doing. + +config KEXEC + bool "kexec system call (EXPERIMENTAL)" + depends on EXPERIMENTAL + help + kexec is a system call that implements the ability to shutdown your + current kernel, and to start another kernel. It is like a reboot + but it is indepedent of the system firmware. And like a reboot + you can start any kernel with it, not just Linux. + + The name comes from the similiarity to the exec system call. + + It is an ongoing process to be certain the hardware in a machine + is properly shutdown, so do not be surprised if this code does not + initially work for you. It may help to enable device hotplugging + support. As of this writing the exact hardware interface is + strongly in flux, so no good recommendation can be made. + config SECCOMP bool "Enable seccomp to safely compute untrusted bytecode" depends on PROC_FS diff --git a/arch/x86_64/Makefile b/arch/x86_64/Makefile index 6f90c246c418..8a73794f9b90 100644 --- a/arch/x86_64/Makefile +++ b/arch/x86_64/Makefile @@ -35,7 +35,7 @@ export IA32_CC IA32_LD IA32_AS IA32_OBJCOPY IA32_CPP LDFLAGS := -m elf_x86_64 OBJCOPYFLAGS := -O binary -R .note -R .comment -S -LDFLAGS_vmlinux := -e stext +LDFLAGS_vmlinux := CHECKFLAGS += -D__x86_64__ -m64 diff --git a/arch/x86_64/boot/compressed/head.S b/arch/x86_64/boot/compressed/head.S index 27264dbd575c..6f55565e4d42 100644 --- a/arch/x86_64/boot/compressed/head.S +++ b/arch/x86_64/boot/compressed/head.S @@ -2,8 +2,6 @@ * linux/boot/head.S * * Copyright (C) 1991, 1992, 1993 Linus Torvalds - * - * $Id: head.S,v 1.3 2001/04/20 00:59:28 ak Exp $ */ /* @@ -21,13 +19,14 @@ */ /* - * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996 + * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996 */ .code32 .text #include <linux/linkage.h> #include <asm/segment.h> +#include <asm/page.h> .code32 .globl startup_32 @@ -77,7 +76,7 @@ startup_32: jnz 3f addl $8,%esp xorl %ebx,%ebx - ljmp $(__KERNEL_CS), $0x100000 + ljmp $(__KERNEL_CS), $__PHYSICAL_START /* * We come here, if we were loaded high. @@ -103,7 +102,7 @@ startup_32: popl %ecx # lcount popl %edx # high_buffer_start popl %eax # hcount - movl $0x100000,%edi + movl $__PHYSICAL_START,%edi cli # make sure we don't get interrupted ljmp $(__KERNEL_CS), $0x1000 # and jump to the move routine @@ -128,7 +127,7 @@ move_routine_start: movsl movl %ebx,%esi # Restore setup pointer xorl %ebx,%ebx - ljmp $(__KERNEL_CS), $0x100000 + ljmp $(__KERNEL_CS), $__PHYSICAL_START move_routine_end: diff --git a/arch/x86_64/boot/compressed/misc.c b/arch/x86_64/boot/compressed/misc.c index c8b9216f9e63..b38d5b8b5fb8 100644 --- a/arch/x86_64/boot/compressed/misc.c +++ b/arch/x86_64/boot/compressed/misc.c @@ -11,6 +11,7 @@ #include "miscsetup.h" #include <asm/io.h> +#include <asm/page.h> /* * gzip declarations @@ -92,8 +93,11 @@ static unsigned long output_ptr = 0; static void *malloc(int size); static void free(void *where); +void* memset(void* s, int c, unsigned n); +void* memcpy(void* dest, const void* src, unsigned n); + static void putstr(const char *); - + extern int end; static long free_mem_ptr = (long)&end; static long free_mem_end_ptr; @@ -284,7 +288,7 @@ void setup_normal_output_buffer(void) #else if ((ALT_MEM_K > EXT_MEM_K ? ALT_MEM_K : EXT_MEM_K) < 1024) error("Less than 2MB of memory"); #endif - output_data = (char *)0x100000; /* Points to 1M */ + output_data = (char *)__PHYSICAL_START; /* Normally Points to 1M */ free_mem_end_ptr = (long)real_mode; } @@ -307,8 +311,8 @@ void setup_output_buffer_if_we_run_high(struct moveparams *mv) low_buffer_size = low_buffer_end - LOW_BUFFER_START; high_loaded = 1; free_mem_end_ptr = (long)high_buffer_start; - if ( (0x100000 + low_buffer_size) > ((ulg)high_buffer_start)) { - high_buffer_start = (uch *)(0x100000 + low_buffer_size); + if ( (__PHYSICAL_START + low_buffer_size) > ((ulg)high_buffer_start)) { + high_buffer_start = (uch *)(__PHYSICAL_START + low_buffer_size); mv->hcount = 0; /* say: we need not to move high_buffer */ } else mv->hcount = -1; diff --git a/arch/x86_64/boot/install.sh b/arch/x86_64/boot/install.sh index f17b40dfc0f4..198af15a7758 100644 --- a/arch/x86_64/boot/install.sh +++ b/arch/x86_64/boot/install.sh @@ -1,6 +1,6 @@ #!/bin/sh # -# arch/i386/boot/install.sh +# arch/x86_64/boot/install.sh # # This file is subject to the terms and conditions of the GNU General Public # License. See the file "COPYING" in the main directory of this archive diff --git a/arch/x86_64/boot/setup.S b/arch/x86_64/boot/setup.S index 75d4d2ad93b3..ff58b2832b75 100644 --- a/arch/x86_64/boot/setup.S +++ b/arch/x86_64/boot/setup.S @@ -33,7 +33,7 @@ * Transcribed from Intel (as86) -> AT&T (gas) by Chris Noe, May 1999. * <stiker@northlink.com> * - * Fix to work around buggy BIOSes which dont use carry bit correctly + * Fix to work around buggy BIOSes which don't use carry bit correctly * and/or report extended memory in CX/DX for e801h memory size detection * call. As a result the kernel got wrong figures. The int15/e801h docs * from Ralf Brown interrupt list seem to indicate AX/BX should be used @@ -383,7 +383,7 @@ sse_ok: # a whole bunch of different types, and allows memory holes and # everything. We scan through this memory map and build a list # of the first 32 memory areas, which we return at [E820MAP]. -# This is documented at http://www.teleport.com/~acpi/acpihtml/topic245.htm +# This is documented at http://www.acpi.info/, in the ACPI 2.0 specification. #define SMAP 0x534d4150 @@ -436,7 +436,7 @@ bail820: meme801: stc # fix to work around buggy - xorw %cx,%cx # BIOSes which dont clear/set + xorw %cx,%cx # BIOSes which don't clear/set xorw %dx,%dx # carry on pass/error of # e801h memory size call # or merely pass cx,dx though @@ -733,7 +733,7 @@ flush_instr: # # but we yet haven't reloaded the CS register, so the default size # of the target offset still is 16 bit. -# However, using an operant prefix (0x66), the CPU will properly +# However, using an operand prefix (0x66), the CPU will properly # take our 48 bit far pointer. (INTeL 80386 Programmer's Reference # Manual, Mixing 16-bit and 32-bit code, page 16-6) diff --git a/arch/x86_64/boot/tools/build.c b/arch/x86_64/boot/tools/build.c index c2fa66313170..18b5bac1c428 100644 --- a/arch/x86_64/boot/tools/build.c +++ b/arch/x86_64/boot/tools/build.c @@ -1,6 +1,4 @@ /* - * $Id: build.c,v 1.3 2001/06/26 15:14:50 pavel Exp $ - * * Copyright (C) 1991, 1992 Linus Torvalds * Copyright (C) 1997 Martin Mares */ @@ -8,7 +6,8 @@ /* * This file builds a disk-image from three different files: * - * - bootsect: exactly 512 bytes of 8086 machine code, loads the rest + * - bootsect: compatibility mbr which prints an error message if + * someone tries to boot the kernel directly. * - setup: 8086 machine code, sets up system parm * - system: 80386 code for actual system * diff --git a/arch/x86_64/ia32/ia32entry.S b/arch/x86_64/ia32/ia32entry.S index f3ca0db85b5b..cc935427d532 100644 --- a/arch/x86_64/ia32/ia32entry.S +++ b/arch/x86_64/ia32/ia32entry.S @@ -589,7 +589,7 @@ ia32_sys_call_table: .quad compat_sys_mq_timedreceive /* 280 */ .quad compat_sys_mq_notify .quad compat_sys_mq_getsetattr - .quad quiet_ni_syscall /* reserved for kexec */ + .quad compat_sys_kexec_load /* reserved for kexec */ .quad compat_sys_waitid .quad quiet_ni_syscall /* sys_altroot */ .quad sys_add_key diff --git a/arch/x86_64/kernel/Makefile b/arch/x86_64/kernel/Makefile index 5ca4a4598fda..48f9e2c19cd6 100644 --- a/arch/x86_64/kernel/Makefile +++ b/arch/x86_64/kernel/Makefile @@ -20,6 +20,7 @@ obj-$(CONFIG_SMP) += smp.o smpboot.o trampoline.o obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o obj-$(CONFIG_X86_IO_APIC) += io_apic.o mpparse.o \ genapic.o genapic_cluster.o genapic_flat.o +obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o crash.o obj-$(CONFIG_PM) += suspend.o obj-$(CONFIG_SOFTWARE_SUSPEND) += suspend_asm.o obj-$(CONFIG_CPU_FREQ) += cpufreq/ diff --git a/arch/x86_64/kernel/acpi/wakeup.S b/arch/x86_64/kernel/acpi/wakeup.S index a4c630034cd4..185faa911db5 100644 --- a/arch/x86_64/kernel/acpi/wakeup.S +++ b/arch/x86_64/kernel/acpi/wakeup.S @@ -67,7 +67,7 @@ wakeup_code: shll $4, %eax addl $(gdta - wakeup_code), %eax movl %eax, gdt_48a +2 - wakeup_code - lgdt %ds:gdt_48a - wakeup_code # load gdt with whatever is + lgdtl %ds:gdt_48a - wakeup_code # load gdt with whatever is # appropriate movl $1, %eax # protected mode (PE) bit diff --git a/arch/x86_64/kernel/apic.c b/arch/x86_64/kernel/apic.c index f8e6cc4fecd4..375d369570ca 100644 --- a/arch/x86_64/kernel/apic.c +++ b/arch/x86_64/kernel/apic.c @@ -133,7 +133,7 @@ void __init connect_bsp_APIC(void) } } -void disconnect_bsp_APIC(void) +void disconnect_bsp_APIC(int virt_wire_setup) { if (pic_mode) { /* @@ -146,6 +146,42 @@ void disconnect_bsp_APIC(void) outb(0x70, 0x22); outb(0x00, 0x23); } + else { + /* Go back to Virtual Wire compatibility mode */ + unsigned long value; + + /* For the spurious interrupt use vector F, and enable it */ + value = apic_read(APIC_SPIV); + value &= ~APIC_VECTOR_MASK; + value |= APIC_SPIV_APIC_ENABLED; + value |= 0xf; + apic_write_around(APIC_SPIV, value); + + if (!virt_wire_setup) { + /* For LVT0 make it edge triggered, active high, external and enabled */ + value = apic_read(APIC_LVT0); + value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | + APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | + APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED ); + value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; + value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT); + apic_write_around(APIC_LVT0, value); + } + else { + /* Disable LVT0 */ + apic_write_around(APIC_LVT0, APIC_LVT_MASKED); + } + + /* For LVT1 make it edge triggered, active high, nmi and enabled */ + value = apic_read(APIC_LVT1); + value &= ~( + APIC_MODE_MASK | APIC_SEND_PENDING | + APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | + APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); + value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; + value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI); + apic_write_around(APIC_LVT1, value); + } } void disable_local_APIC(void) @@ -285,7 +321,7 @@ void __init init_bsp_APIC(void) apic_write_around(APIC_LVT1, value); } -void __init setup_local_APIC (void) +void __cpuinit setup_local_APIC (void) { unsigned int value, ver, maxlvt; @@ -534,7 +570,7 @@ static struct sys_device device_lapic = { .cls = &lapic_sysclass, }; -static void __init apic_pm_activate(void) +static void __cpuinit apic_pm_activate(void) { apic_pm_state.active = 1; } @@ -774,14 +810,14 @@ void __init setup_boot_APIC_clock (void) local_irq_enable(); } -void __init setup_secondary_APIC_clock(void) +void __cpuinit setup_secondary_APIC_clock(void) { local_irq_disable(); /* FIXME: Do we need this? --RR */ setup_APIC_timer(calibration_result); local_irq_enable(); } -void __init disable_APIC_timer(void) +void __cpuinit disable_APIC_timer(void) { if (using_apic_timer) { unsigned long v; diff --git a/arch/x86_64/kernel/crash.c b/arch/x86_64/kernel/crash.c new file mode 100644 index 000000000000..d7fa4248501c --- /dev/null +++ b/arch/x86_64/kernel/crash.c @@ -0,0 +1,35 @@ +/* + * Architecture specific (x86_64) functions for kexec based crash dumps. + * + * Created by: Hariprasad Nellitheertha (hari@in.ibm.com) + * + * Copyright (C) IBM Corporation, 2004. All rights reserved. + * + */ + +#include <linux/init.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/smp.h> +#include <linux/irq.h> +#include <linux/reboot.h> +#include <linux/kexec.h> + +#include <asm/processor.h> +#include <asm/hardirq.h> +#include <asm/nmi.h> +#include <asm/hw_irq.h> + +note_buf_t crash_notes[NR_CPUS]; + +void machine_crash_shutdown(struct pt_regs *regs) +{ + /* This function is only called after the system + * has paniced or is otherwise in a critical state. + * The minimum amount of code to allow a kexec'd kernel + * to run successfully needs to happen here. + * + * In practice this means shooting down the other cpus in + * an SMP system. + */ +} diff --git a/arch/x86_64/kernel/e820.c b/arch/x86_64/kernel/e820.c index 7c154dfff64a..6ded3a50dfe6 100644 --- a/arch/x86_64/kernel/e820.c +++ b/arch/x86_64/kernel/e820.c @@ -16,6 +16,7 @@ #include <linux/bootmem.h> #include <linux/ioport.h> #include <linux/string.h> +#include <linux/kexec.h> #include <asm/page.h> #include <asm/e820.h> #include <asm/proto.h> @@ -191,8 +192,6 @@ void __init e820_reserve_resources(void) int i; for (i = 0; i < e820.nr_map; i++) { struct resource *res; - if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL) - continue; res = alloc_bootmem_low(sizeof(struct resource)); switch (e820.map[i].type) { case E820_RAM: res->name = "System RAM"; break; @@ -212,6 +211,9 @@ void __init e820_reserve_resources(void) */ request_resource(res, &code_resource); request_resource(res, &data_resource); +#ifdef CONFIG_KEXEC + request_resource(res, &crashk_res); +#endif } } } diff --git a/arch/x86_64/kernel/genapic_flat.c b/arch/x86_64/kernel/genapic_flat.c index b4cbbad04226..282846965080 100644 --- a/arch/x86_64/kernel/genapic_flat.c +++ b/arch/x86_64/kernel/genapic_flat.c @@ -7,6 +7,8 @@ * Hacked for x86-64 by James Cleverdon from i386 architecture code by * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and * James Cleverdon. + * Ashok Raj <ashok.raj@intel.com> + * Removed IPI broadcast shortcut to support CPU hotplug */ #include <linux/config.h> #include <linux/threads.h> @@ -18,6 +20,46 @@ #include <asm/smp.h> #include <asm/ipi.h> +/* + * The following permit choosing broadcast IPI shortcut v.s sending IPI only + * to online cpus via the send_IPI_mask varient. + * The mask version is my preferred option, since it eliminates a lot of + * other extra code that would need to be written to cleanup intrs sent + * to a CPU while offline. + * + * Sending broadcast introduces lots of trouble in CPU hotplug situations. + * These IPI's are delivered to cpu's irrespective of their offline status + * and could pickup stale intr data when these CPUS are turned online. + * + * Not using broadcast is a cleaner approach IMO, but Andi Kleen disagrees with + * the idea of not using broadcast IPI's anymore. Hence the run time check + * is introduced, on his request so we can choose an alternate mechanism. + * + * Initial wacky performance tests that collect cycle counts show + * no increase in using mask v.s broadcast version. In fact they seem + * identical in terms of cycle counts. + * + * if we need to use broadcast, we need to do the following. + * + * cli; + * hold call_lock; + * clear any pending IPI, just ack and clear all pending intr + * set cpu_online_map; + * release call_lock; + * sti; + * + * The complicated dummy irq processing shown above is not required if + * we didnt sent IPI's to wrong CPU's in the first place. + * + * - Ashok Raj <ashok.raj@intel.com> + */ +#ifdef CONFIG_HOTPLUG_CPU +#define DEFAULT_SEND_IPI (1) +#else +#define DEFAULT_SEND_IPI (0) +#endif + +static int no_broadcast=DEFAULT_SEND_IPI; static cpumask_t flat_target_cpus(void) { @@ -45,22 +87,6 @@ static void flat_init_apic_ldr(void) apic_write_around(APIC_LDR, val); } -static void flat_send_IPI_allbutself(int vector) -{ - /* - * if there are no other CPUs in the system then - * we get an APIC send error if we try to broadcast. - * thus we have to avoid sending IPIs in this case. - */ - if (num_online_cpus() > 1) - __send_IPI_shortcut(APIC_DEST_ALLBUT, vector, APIC_DEST_LOGICAL); -} - -static void flat_send_IPI_all(int vector) -{ - __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL); -} - static void flat_send_IPI_mask(cpumask_t cpumask, int vector) { unsigned long mask = cpus_addr(cpumask)[0]; @@ -93,6 +119,39 @@ static void flat_send_IPI_mask(cpumask_t cpumask, int vector) local_irq_restore(flags); } +static inline void __local_flat_send_IPI_allbutself(int vector) +{ + if (no_broadcast) { + cpumask_t mask = cpu_online_map; + int this_cpu = get_cpu(); + + cpu_clear(this_cpu, mask); + flat_send_IPI_mask(mask, vector); + put_cpu(); + } + else + __send_IPI_shortcut(APIC_DEST_ALLBUT, vector, APIC_DEST_LOGICAL); +} + +static inline void __local_flat_send_IPI_all(int vector) +{ + if (no_broadcast) + flat_send_IPI_mask(cpu_online_map, vector); + else + __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL); +} + +static void flat_send_IPI_allbutself(int vector) +{ + if (((num_online_cpus()) - 1) >= 1) + __local_flat_send_IPI_allbutself(vector); +} + +static void flat_send_IPI_all(int vector) +{ + __local_flat_send_IPI_all(vector); +} + static int flat_apic_id_registered(void) { return physid_isset(GET_APIC_ID(apic_read(APIC_ID)), phys_cpu_present_map); @@ -111,6 +170,16 @@ static unsigned int phys_pkg_id(int index_msb) return ((ebx >> 24) & 0xFF) >> index_msb; } +static __init int no_ipi_broadcast(char *str) +{ + get_option(&str, &no_broadcast); + printk ("Using %s mode\n", no_broadcast ? "No IPI Broadcast" : + "IPI Broadcast"); + return 1; +} + +__setup("no_ipi_broadcast", no_ipi_broadcast); + struct genapic apic_flat = { .name = "flat", .int_delivery_mode = dest_LowestPrio, @@ -125,3 +194,12 @@ struct genapic apic_flat = { .cpu_mask_to_apicid = flat_cpu_mask_to_apicid, .phys_pkg_id = phys_pkg_id, }; + +static int __init print_ipi_mode(void) +{ + printk ("Using IPI %s mode\n", no_broadcast ? "No-Shortcut" : + "Shortcut"); + return 0; +} + +late_initcall(print_ipi_mode); diff --git a/arch/x86_64/kernel/head.S b/arch/x86_64/kernel/head.S index 9bd2e7a4b81e..8d765aa77a26 100644 --- a/arch/x86_64/kernel/head.S +++ b/arch/x86_64/kernel/head.S @@ -248,23 +248,23 @@ ENTRY(_stext) */ .org 0x1000 ENTRY(init_level4_pgt) - .quad 0x0000000000102007 /* -> level3_ident_pgt */ + .quad 0x0000000000002007 + __PHYSICAL_START /* -> level3_ident_pgt */ .fill 255,8,0 - .quad 0x000000000010a007 + .quad 0x000000000000a007 + __PHYSICAL_START .fill 254,8,0 /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */ - .quad 0x0000000000103007 /* -> level3_kernel_pgt */ + .quad 0x0000000000003007 + __PHYSICAL_START /* -> level3_kernel_pgt */ .org 0x2000 ENTRY(level3_ident_pgt) - .quad 0x0000000000104007 + .quad 0x0000000000004007 + __PHYSICAL_START .fill 511,8,0 .org 0x3000 ENTRY(level3_kernel_pgt) .fill 510,8,0 /* (2^48-(2*1024*1024*1024)-((2^39)*511))/(2^30) = 510 */ - .quad 0x0000000000105007 /* -> level2_kernel_pgt */ + .quad 0x0000000000005007 + __PHYSICAL_START /* -> level2_kernel_pgt */ .fill 1,8,0 .org 0x4000 @@ -337,17 +337,17 @@ ENTRY(empty_bad_pmd_table) .org 0xa000 ENTRY(level3_physmem_pgt) - .quad 0x0000000000105007 /* -> level2_kernel_pgt (so that __va works even before pagetable_init) */ + .quad 0x0000000000005007 + __PHYSICAL_START /* -> level2_kernel_pgt (so that __va works even before pagetable_init) */ .org 0xb000 #ifdef CONFIG_ACPI_SLEEP ENTRY(wakeup_level4_pgt) - .quad 0x0000000000102007 /* -> level3_ident_pgt */ + .quad 0x0000000000002007 + __PHYSICAL_START /* -> level3_ident_pgt */ .fill 255,8,0 - .quad 0x000000000010a007 + .quad 0x000000000000a007 + __PHYSICAL_START .fill 254,8,0 /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */ - .quad 0x0000000000103007 /* -> level3_kernel_pgt */ + .quad 0x0000000000003007 + __PHYSICAL_START /* -> level3_kernel_pgt */ #endif .data diff --git a/arch/x86_64/kernel/i387.c b/arch/x86_64/kernel/i387.c index ba139cac57ce..d9b22b633e39 100644 --- a/arch/x86_64/kernel/i387.c +++ b/arch/x86_64/kernel/i387.c @@ -42,7 +42,7 @@ void mxcsr_feature_mask_init(void) * Called at bootup to set up the initial FPU state that is later cloned * into all processes. */ -void __init fpu_init(void) +void __cpuinit fpu_init(void) { unsigned long oldcr0 = read_cr0(); extern void __bad_fxsave_alignment(void); diff --git a/arch/x86_64/kernel/i8259.c b/arch/x86_64/kernel/i8259.c index 19eafa0aa95c..a89169095129 100644 --- a/arch/x86_64/kernel/i8259.c +++ b/arch/x86_64/kernel/i8259.c @@ -414,10 +414,22 @@ static int i8259A_suspend(struct sys_device *dev, pm_message_t state) return 0; } +static int i8259A_shutdown(struct sys_device *dev) +{ + /* Put the i8259A into a quiescent state that + * the kernel initialization code can get it + * out of. + */ + outb(0xff, 0x21); /* mask all of 8259A-1 */ + outb(0xff, 0xA1); /* mask all of 8259A-1 */ + return 0; +} + static struct sysdev_class i8259_sysdev_class = { set_kset_name("i8259"), .suspend = i8259A_suspend, .resume = i8259A_resume, + .shutdown = i8259A_shutdown, }; static struct sys_device device_i8259A = { diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c index afd87e64d0a8..157190d986bb 100644 --- a/arch/x86_64/kernel/io_apic.c +++ b/arch/x86_64/kernel/io_apic.c @@ -330,7 +330,7 @@ static int find_irq_entry(int apic, int pin, int type) /* * Find the pin to which IRQ[irq] (ISA) is connected */ -static int __init find_isa_irq_pin(int irq, int type) +static int find_isa_irq_pin(int irq, int type) { int i; @@ -1132,12 +1132,44 @@ static void __init enable_IO_APIC(void) */ void disable_IO_APIC(void) { + int pin; /* * Clear the IO-APIC before rebooting: */ clear_IO_APIC(); - disconnect_bsp_APIC(); + /* + * If the i82559 is routed through an IOAPIC + * Put that IOAPIC in virtual wire mode + * so legacy interrups can be delivered. + */ + pin = find_isa_irq_pin(0, mp_ExtINT); + if (pin != -1) { + struct IO_APIC_route_entry entry; + unsigned long flags; + + memset(&entry, 0, sizeof(entry)); + entry.mask = 0; /* Enabled */ + entry.trigger = 0; /* Edge */ + entry.irr = 0; + entry.polarity = 0; /* High */ + entry.delivery_status = 0; + entry.dest_mode = 0; /* Physical */ + entry.delivery_mode = 7; /* ExtInt */ + entry.vector = 0; + entry.dest.physical.physical_dest = 0; + + + /* + * Add it to the IO-APIC irq-routing table: + */ + spin_lock_irqsave(&ioapic_lock, flags); + io_apic_write(0, 0x11+2*pin, *(((int *)&entry)+1)); + io_apic_write(0, 0x10+2*pin, *(((int *)&entry)+0)); + spin_unlock_irqrestore(&ioapic_lock, flags); + } + + disconnect_bsp_APIC(pin != -1); } /* diff --git a/arch/x86_64/kernel/irq.c b/arch/x86_64/kernel/irq.c index 62b112e4deb4..cc3fb85f5145 100644 --- a/arch/x86_64/kernel/irq.c +++ b/arch/x86_64/kernel/irq.c @@ -14,6 +14,7 @@ #include <linux/interrupt.h> #include <linux/seq_file.h> #include <linux/module.h> +#include <linux/delay.h> #include <asm/uaccess.h> #include <asm/io_apic.h> @@ -106,3 +107,31 @@ asmlinkage unsigned int do_IRQ(struct pt_regs *regs) return 1; } +#ifdef CONFIG_HOTPLUG_CPU +void fixup_irqs(cpumask_t map) +{ + unsigned int irq; + static int warned; + + for (irq = 0; irq < NR_IRQS; irq++) { + cpumask_t mask; + if (irq == 2) + continue; + + cpus_and(mask, irq_affinity[irq], map); + if (any_online_cpu(mask) == NR_CPUS) { + printk("Breaking affinity for irq %i\n", irq); + mask = map; + } + if (irq_desc[irq].handler->set_affinity) + irq_desc[irq].handler->set_affinity(irq, mask); + else if (irq_desc[irq].action && !(warned++)) + printk("Cannot set affinity for irq %i\n", irq); + } + + /* That doesn't seem sufficient. Give it 1ms. */ + local_irq_enable(); + mdelay(1); + local_irq_disable(); +} +#endif diff --git a/arch/x86_64/kernel/machine_kexec.c b/arch/x86_64/kernel/machine_kexec.c new file mode 100644 index 000000000000..60d1eff41567 --- /dev/null +++ b/arch/x86_64/kernel/machine_kexec.c @@ -0,0 +1,250 @@ +/* + * machine_kexec.c - handle transition of Linux booting another kernel + * Copyright (C) 2002-2005 Eric Biederman <ebiederm@xmission.com> + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#include <linux/mm.h> +#include <linux/kexec.h> +#include <linux/delay.h> +#include <linux/string.h> +#include <linux/reboot.h> +#include <asm/pda.h> +#include <asm/pgtable.h> +#include <asm/pgalloc.h> +#include <asm/tlbflush.h> +#include <asm/mmu_context.h> +#include <asm/io.h> +#include <asm/apic.h> +#include <asm/cpufeature.h> +#include <asm/hw_irq.h> + +#define LEVEL0_SIZE (1UL << 12UL) +#define LEVEL1_SIZE (1UL << 21UL) +#define LEVEL2_SIZE (1UL << 30UL) +#define LEVEL3_SIZE (1UL << 39UL) +#define LEVEL4_SIZE (1UL << 48UL) + +#define L0_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) +#define L1_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_PSE) +#define L2_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) +#define L3_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) + +static void init_level2_page(u64 *level2p, unsigned long addr) +{ + unsigned long end_addr; + + addr &= PAGE_MASK; + end_addr = addr + LEVEL2_SIZE; + while (addr < end_addr) { + *(level2p++) = addr | L1_ATTR; + addr += LEVEL1_SIZE; + } +} + +static int init_level3_page(struct kimage *image, u64 *level3p, + unsigned long addr, unsigned long last_addr) +{ + unsigned long end_addr; + int result; + + result = 0; + addr &= PAGE_MASK; + end_addr = addr + LEVEL3_SIZE; + while ((addr < last_addr) && (addr < end_addr)) { + struct page *page; + u64 *level2p; + + page = kimage_alloc_control_pages(image, 0); + if (!page) { + result = -ENOMEM; + goto out; + } + level2p = (u64 *)page_address(page); + init_level2_page(level2p, addr); + *(level3p++) = __pa(level2p) | L2_ATTR; + addr += LEVEL2_SIZE; + } + /* clear the unused entries */ + while (addr < end_addr) { + *(level3p++) = 0; + addr += LEVEL2_SIZE; + } +out: + return result; +} + + +static int init_level4_page(struct kimage *image, u64 *level4p, + unsigned long addr, unsigned long last_addr) +{ + unsigned long end_addr; + int result; + + result = 0; + addr &= PAGE_MASK; + end_addr = addr + LEVEL4_SIZE; + while ((addr < last_addr) && (addr < end_addr)) { + struct page *page; + u64 *level3p; + + page = kimage_alloc_control_pages(image, 0); + if (!page) { + result = -ENOMEM; + goto out; + } + level3p = (u64 *)page_address(page); + result = init_level3_page(image, level3p, addr, last_addr); + if (result) { + goto out; + } + *(level4p++) = __pa(level3p) | L3_ATTR; + addr += LEVEL3_SIZE; + } + /* clear the unused entries */ + while (addr < end_addr) { + *(level4p++) = 0; + addr += LEVEL3_SIZE; + } +out: + return result; +} + + +static int init_pgtable(struct kimage *image, unsigned long start_pgtable) +{ + u64 *level4p; + level4p = (u64 *)__va(start_pgtable); + return init_level4_page(image, level4p, 0, end_pfn << PAGE_SHIFT); +} + +static void set_idt(void *newidt, u16 limit) +{ + unsigned char curidt[10]; + + /* x86-64 supports unaliged loads & stores */ + (*(u16 *)(curidt)) = limit; + (*(u64 *)(curidt +2)) = (unsigned long)(newidt); + + __asm__ __volatile__ ( + "lidt %0\n" + : "=m" (curidt) + ); +}; + + +static void set_gdt(void *newgdt, u16 limit) +{ + unsigned char curgdt[10]; + + /* x86-64 supports unaligned loads & stores */ + (*(u16 *)(curgdt)) = limit; + (*(u64 *)(curgdt +2)) = (unsigned long)(newgdt); + + __asm__ __volatile__ ( + "lgdt %0\n" + : "=m" (curgdt) + ); +}; + +static void load_segments(void) +{ + __asm__ __volatile__ ( + "\tmovl $"STR(__KERNEL_DS)",%eax\n" + "\tmovl %eax,%ds\n" + "\tmovl %eax,%es\n" + "\tmovl %eax,%ss\n" + "\tmovl %eax,%fs\n" + "\tmovl %eax,%gs\n" + ); +#undef STR +#undef __STR +} + +typedef NORET_TYPE void (*relocate_new_kernel_t)(unsigned long indirection_page, + unsigned long control_code_buffer, + unsigned long start_address, + unsigned long pgtable) ATTRIB_NORET; + +const extern unsigned char relocate_new_kernel[]; +const extern unsigned long relocate_new_kernel_size; + +int machine_kexec_prepare(struct kimage *image) +{ + unsigned long start_pgtable, control_code_buffer; + int result; + + /* Calculate the offsets */ + start_pgtable = page_to_pfn(image->control_code_page) << PAGE_SHIFT; + control_code_buffer = start_pgtable + 4096UL; + + /* Setup the identity mapped 64bit page table */ + result = init_pgtable(image, start_pgtable); + if (result) + return result; + + /* Place the code in the reboot code buffer */ + memcpy(__va(control_code_buffer), relocate_new_kernel, + relocate_new_kernel_size); + + return 0; +} + +void machine_kexec_cleanup(struct kimage *image) +{ + return; +} + +/* + * Do not allocate memory (or fail in any way) in machine_kexec(). + * We are past the point of no return, committed to rebooting now. + */ +NORET_TYPE void machine_kexec(struct kimage *image) +{ + unsigned long page_list; + unsigned long control_code_buffer; + unsigned long start_pgtable; + relocate_new_kernel_t rnk; + + /* Interrupts aren't acceptable while we reboot */ + local_irq_disable(); + + /* Calculate the offsets */ + page_list = image->head; + start_pgtable = page_to_pfn(image->control_code_page) << PAGE_SHIFT; + control_code_buffer = start_pgtable + 4096UL; + + /* Set the low half of the page table to my identity mapped + * page table for kexec. Leave the high half pointing at the + * kernel pages. Don't bother to flush the global pages + * as that will happen when I fully switch to my identity mapped + * page table anyway. + */ + memcpy(__va(read_cr3()), __va(start_pgtable), PAGE_SIZE/2); + __flush_tlb(); + + + /* The segment registers are funny things, they are + * automatically loaded from a table, in memory wherever you + * set them to a specific selector, but this table is never + * accessed again unless you set the segment to a different selector. + * + * The more common model are caches where the behide + * the scenes work is done, but is also dropped at arbitrary + * times. + * + * I take advantage of this here by force loading the + * segments, before I zap the gdt with an invalid value. + */ + load_segments(); + /* The gdt & idt are now invalid. + * If you want to load them you must set up your own idt & gdt. + */ + set_gdt(phys_to_virt(0),0); + set_idt(phys_to_virt(0),0); + /* now call it */ + rnk = (relocate_new_kernel_t) control_code_buffer; + (*rnk)(page_list, control_code_buffer, image->start, start_pgtable); +} diff --git a/arch/x86_64/kernel/mce.c b/arch/x86_64/kernel/mce.c index 3a89d735a4f6..21e70625a495 100644 --- a/arch/x86_64/kernel/mce.c +++ b/arch/x86_64/kernel/mce.c @@ -327,7 +327,7 @@ static void mce_init(void *dummy) } /* Add per CPU specific workarounds here */ -static void __init mce_cpu_quirks(struct cpuinfo_x86 *c) +static void __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c) { /* This should be disabled by the BIOS, but isn't always */ if (c->x86_vendor == X86_VENDOR_AMD && c->x86 == 15) { @@ -337,7 +337,7 @@ static void __init mce_cpu_quirks(struct cpuinfo_x86 *c) } } -static void __init mce_cpu_features(struct cpuinfo_x86 *c) +static void __cpuinit mce_cpu_features(struct cpuinfo_x86 *c) { switch (c->x86_vendor) { case X86_VENDOR_INTEL: @@ -352,7 +352,7 @@ static void __init mce_cpu_features(struct cpuinfo_x86 *c) * Called for each booted CPU to set up machine checks. * Must be called with preempt off. */ -void __init mcheck_init(struct cpuinfo_x86 *c) +void __cpuinit mcheck_init(struct cpuinfo_x86 *c) { static cpumask_t mce_cpus __initdata = CPU_MASK_NONE; @@ -411,7 +411,7 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, loff memset(mcelog.entry, 0, next * sizeof(struct mce)); mcelog.next = 0; - synchronize_kernel(); + synchronize_sched(); /* Collect entries that were still getting written before the synchronize. */ @@ -542,7 +542,7 @@ ACCESSOR(bank4ctl,bank[4],mce_restart()) ACCESSOR(tolerant,tolerant,) ACCESSOR(check_interval,check_interval,mce_restart()) -static __init int mce_init_device(void) +static __cpuinit int mce_init_device(void) { int err; if (!mce_available(&boot_cpu_data)) diff --git a/arch/x86_64/kernel/mce_intel.c b/arch/x86_64/kernel/mce_intel.c index 4db9a640069f..0be0a7959814 100644 --- a/arch/x86_64/kernel/mce_intel.c +++ b/arch/x86_64/kernel/mce_intel.c @@ -42,7 +42,7 @@ done: irq_exit(); } -static void __init intel_init_thermal(struct cpuinfo_x86 *c) +static void __cpuinit intel_init_thermal(struct cpuinfo_x86 *c) { u32 l, h; int tm2 = 0; @@ -93,7 +93,7 @@ static void __init intel_init_thermal(struct cpuinfo_x86 *c) return; } -void __init mce_intel_feature_init(struct cpuinfo_x86 *c) +void __cpuinit mce_intel_feature_init(struct cpuinfo_x86 *c) { intel_init_thermal(c); } diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c index 31c0f2e6ac91..4e44d6e6b7e5 100644 --- a/arch/x86_64/kernel/nmi.c +++ b/arch/x86_64/kernel/nmi.c @@ -98,7 +98,7 @@ static unsigned int nmi_p4_cccr_val; (P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \ P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE) -static __init inline int nmi_known_cpu(void) +static __cpuinit inline int nmi_known_cpu(void) { switch (boot_cpu_data.x86_vendor) { case X86_VENDOR_AMD: @@ -110,7 +110,7 @@ static __init inline int nmi_known_cpu(void) } /* Run after command line and cpu_init init, but before all other checks */ -void __init nmi_watchdog_default(void) +void __cpuinit nmi_watchdog_default(void) { if (nmi_watchdog != NMI_DEFAULT) return; diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c index e59d1f9d6163..1d91271796e5 100644 --- a/arch/x86_64/kernel/process.c +++ b/arch/x86_64/kernel/process.c @@ -8,7 +8,8 @@ * * X86-64 port * Andi Kleen. - * + * + * CPU hotplug support - ashok.raj@intel.com * $Id: process.c,v 1.38 2002/01/15 10:08:03 ak Exp $ */ @@ -18,6 +19,7 @@ #include <stdarg.h> +#include <linux/cpu.h> #include <linux/errno.h> #include <linux/sched.h> #include <linux/kernel.h> @@ -154,6 +156,29 @@ void cpu_idle_wait(void) } EXPORT_SYMBOL_GPL(cpu_idle_wait); +#ifdef CONFIG_HOTPLUG_CPU +DECLARE_PER_CPU(int, cpu_state); + +#include <asm/nmi.h> +/* We don't actually take CPU down, just spin without interrupts. */ +static inline void play_dead(void) +{ + idle_task_exit(); + wbinvd(); + mb(); + /* Ack it */ + __get_cpu_var(cpu_state) = CPU_DEAD; + + while (1) + safe_halt(); +} +#else +static inline void play_dead(void) +{ + BUG(); +} +#endif /* CONFIG_HOTPLUG_CPU */ + /* * The idle thread. There's no useful work to be * done, so just try to conserve power and have a @@ -174,6 +199,8 @@ void cpu_idle (void) idle = pm_idle; if (!idle) idle = default_idle; + if (cpu_is_offline(smp_processor_id())) + play_dead(); idle(); } @@ -204,7 +231,7 @@ static void mwait_idle(void) } } -void __init select_idle_routine(const struct cpuinfo_x86 *c) +void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) { static int printed; if (cpu_has(c, X86_FEATURE_MWAIT)) { diff --git a/arch/x86_64/kernel/reboot.c b/arch/x86_64/kernel/reboot.c index be4b36f762cf..57e71dbdfd69 100644 --- a/arch/x86_64/kernel/reboot.c +++ b/arch/x86_64/kernel/reboot.c @@ -66,41 +66,47 @@ static int __init reboot_setup(char *str) __setup("reboot=", reboot_setup); -#ifdef CONFIG_SMP -static void smp_halt(void) +static inline void kb_wait(void) { - int cpuid = safe_smp_processor_id(); - static int first_entry = 1; + int i; - if (reboot_force) - return; + for (i=0; i<0x10000; i++) + if ((inb_p(0x64) & 0x02) == 0) + break; +} - if (first_entry) { - first_entry = 0; - smp_call_function((void *)machine_restart, NULL, 1, 0); - } - - smp_stop_cpu(); +void machine_shutdown(void) +{ + /* Stop the cpus and apics */ +#ifdef CONFIG_SMP + int reboot_cpu_id; - /* AP calling this. Just halt */ - if (cpuid != boot_cpu_id) { - for (;;) - asm("hlt"); + /* The boot cpu is always logical cpu 0 */ + reboot_cpu_id = 0; + + /* Make certain the cpu I'm about to reboot on is online */ + if (!cpu_isset(reboot_cpu_id, cpu_online_map)) { + reboot_cpu_id = smp_processor_id(); } - /* Wait for all other CPUs to have run smp_stop_cpu */ - while (!cpus_empty(cpu_online_map)) - rep_nop(); -} + /* Make certain I only run on the appropriate processor */ + set_cpus_allowed(current, cpumask_of_cpu(reboot_cpu_id)); + + /* O.K Now that I'm on the appropriate processor, + * stop all of the others. + */ + smp_send_stop(); #endif -static inline void kb_wait(void) -{ - int i; + local_irq_disable(); - for (i=0; i<0x10000; i++) - if ((inb_p(0x64) & 0x02) == 0) - break; +#ifndef CONFIG_SMP + disable_local_APIC(); +#endif + + disable_IO_APIC(); + + local_irq_enable(); } void machine_restart(char * __unused) @@ -109,9 +115,7 @@ void machine_restart(char * __unused) printk("machine restart\n"); -#ifdef CONFIG_SMP - smp_halt(); -#endif + machine_shutdown(); if (!reboot_force) { local_irq_disable(); diff --git a/arch/x86_64/kernel/relocate_kernel.S b/arch/x86_64/kernel/relocate_kernel.S new file mode 100644 index 000000000000..d24fa9b72a2b --- /dev/null +++ b/arch/x86_64/kernel/relocate_kernel.S @@ -0,0 +1,143 @@ +/* + * relocate_kernel.S - put the kernel image in place to boot + * Copyright (C) 2002-2005 Eric Biederman <ebiederm@xmission.com> + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#include <linux/linkage.h> + + /* + * Must be relocatable PIC code callable as a C function, that once + * it starts can not use the previous processes stack. + */ + .globl relocate_new_kernel + .code64 +relocate_new_kernel: + /* %rdi page_list + * %rsi reboot_code_buffer + * %rdx start address + * %rcx page_table + * %r8 arg5 + * %r9 arg6 + */ + + /* zero out flags, and disable interrupts */ + pushq $0 + popfq + + /* set a new stack at the bottom of our page... */ + lea 4096(%rsi), %rsp + + /* store the parameters back on the stack */ + pushq %rdx /* store the start address */ + + /* Set cr0 to a known state: + * 31 1 == Paging enabled + * 18 0 == Alignment check disabled + * 16 0 == Write protect disabled + * 3 0 == No task switch + * 2 0 == Don't do FP software emulation. + * 0 1 == Proctected mode enabled + */ + movq %cr0, %rax + andq $~((1<<18)|(1<<16)|(1<<3)|(1<<2)), %rax + orl $((1<<31)|(1<<0)), %eax + movq %rax, %cr0 + + /* Set cr4 to a known state: + * 10 0 == xmm exceptions disabled + * 9 0 == xmm registers instructions disabled + * 8 0 == performance monitoring counter disabled + * 7 0 == page global disabled + * 6 0 == machine check exceptions disabled + * 5 1 == physical address extension enabled + * 4 0 == page size extensions disabled + * 3 0 == Debug extensions disabled + * 2 0 == Time stamp disable (disabled) + * 1 0 == Protected mode virtual interrupts disabled + * 0 0 == VME disabled + */ + + movq $((1<<5)), %rax + movq %rax, %cr4 + + jmp 1f +1: + + /* Switch to the identity mapped page tables, + * and flush the TLB. + */ + movq %rcx, %cr3 + + /* Do the copies */ + movq %rdi, %rcx /* Put the page_list in %rcx */ + xorq %rdi, %rdi + xorq %rsi, %rsi + jmp 1f + +0: /* top, read another word for the indirection page */ + + movq (%rbx), %rcx + addq $8, %rbx +1: + testq $0x1, %rcx /* is it a destination page? */ + jz 2f + movq %rcx, %rdi + andq $0xfffffffffffff000, %rdi + jmp 0b +2: + testq $0x2, %rcx /* is it an indirection page? */ + jz 2f + movq %rcx, %rbx + andq $0xfffffffffffff000, %rbx + jmp 0b +2: + testq $0x4, %rcx /* is it the done indicator? */ + jz 2f + jmp 3f +2: + testq $0x8, %rcx /* is it the source indicator? */ + jz 0b /* Ignore it otherwise */ + movq %rcx, %rsi /* For ever source page do a copy */ + andq $0xfffffffffffff000, %rsi + + movq $512, %rcx + rep ; movsq + jmp 0b +3: + + /* To be certain of avoiding problems with self-modifying code + * I need to execute a serializing instruction here. + * So I flush the TLB by reloading %cr3 here, it's handy, + * and not processor dependent. + */ + movq %cr3, %rax + movq %rax, %cr3 + + /* set all of the registers to known values */ + /* leave %rsp alone */ + + xorq %rax, %rax + xorq %rbx, %rbx + xorq %rcx, %rcx + xorq %rdx, %rdx + xorq %rsi, %rsi + xorq %rdi, %rdi + xorq %rbp, %rbp + xorq %r8, %r8 + xorq %r9, %r9 + xorq %r10, %r9 + xorq %r11, %r11 + xorq %r12, %r12 + xorq %r13, %r13 + xorq %r14, %r14 + xorq %r15, %r15 + + ret +relocate_new_kernel_end: + + .globl relocate_new_kernel_size +relocate_new_kernel_size: + .quad relocate_new_kernel_end - relocate_new_kernel diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index 000015dd5a8b..b02d921da4f7 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -41,6 +41,7 @@ #include <linux/kallsyms.h> #include <linux/edd.h> #include <linux/mmzone.h> +#include <linux/kexec.h> #include <asm/mtrr.h> #include <asm/uaccess.h> @@ -367,6 +368,27 @@ static __init void parse_cmdline_early (char ** cmdline_p) if (!memcmp(from, "noexec=", 7)) nonx_setup(from + 7); +#ifdef CONFIG_KEXEC + /* crashkernel=size@addr specifies the location to reserve for + * a crash kernel. By reserving this memory we guarantee + * that linux never set's it up as a DMA target. + * Useful for holding code to do something appropriate + * after a kernel panic. + */ + else if (!memcmp(from, "crashkernel=", 12)) { + unsigned long size, base; + size = memparse(from+12, &from); + if (*from == '@') { + base = memparse(from+1, &from); + /* FIXME: Do I want a sanity check + * to validate the memory range? + */ + crashk_res.start = base; + crashk_res.end = base + size - 1; + } + } +#endif + next_char: c = *(from++); if (!c) @@ -625,6 +647,13 @@ void __init setup_arch(char **cmdline_p) #endif sparse_init(); + +#ifdef CONFIG_KEXEC + if (crashk_res.start != crashk_res.end) { + reserve_bootmem(crashk_res.start, + crashk_res.end - crashk_res.start + 1); + } +#endif paging_init(); check_ioapic(); @@ -676,7 +705,7 @@ void __init setup_arch(char **cmdline_p) #endif } -static int __init get_model_name(struct cpuinfo_x86 *c) +static int __cpuinit get_model_name(struct cpuinfo_x86 *c) { unsigned int *v; @@ -692,7 +721,7 @@ static int __init get_model_name(struct cpuinfo_x86 *c) } -static void __init display_cacheinfo(struct cpuinfo_x86 *c) +static void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) { unsigned int n, dummy, eax, ebx, ecx, edx; @@ -803,7 +832,7 @@ static int __init init_amd(struct cpuinfo_x86 *c) return r; } -static void __init detect_ht(struct cpuinfo_x86 *c) +static void __cpuinit detect_ht(struct cpuinfo_x86 *c) { #ifdef CONFIG_SMP u32 eax, ebx, ecx, edx; @@ -864,7 +893,7 @@ static void __init detect_ht(struct cpuinfo_x86 *c) /* * find out the number of processor cores on the die */ -static int __init intel_num_cpu_cores(struct cpuinfo_x86 *c) +static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c) { unsigned int eax; @@ -882,7 +911,7 @@ static int __init intel_num_cpu_cores(struct cpuinfo_x86 *c) return 1; } -static void __init init_intel(struct cpuinfo_x86 *c) +static void __cpuinit init_intel(struct cpuinfo_x86 *c) { /* Cache sizes */ unsigned n; @@ -902,7 +931,7 @@ static void __init init_intel(struct cpuinfo_x86 *c) c->x86_num_cores = intel_num_cpu_cores(c); } -void __init get_cpu_vendor(struct cpuinfo_x86 *c) +void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) { char *v = c->x86_vendor_id; @@ -923,7 +952,7 @@ struct cpu_model_info { /* Do some early cpuid on the boot CPU to get some parameter that are needed before check_bugs. Everything advanced is in identify_cpu below. */ -void __init early_identify_cpu(struct cpuinfo_x86 *c) +void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c) { u32 tfms; @@ -977,7 +1006,7 @@ void __init early_identify_cpu(struct cpuinfo_x86 *c) /* * This does the hard work of actually picking apart the CPU stuff... */ -void __init identify_cpu(struct cpuinfo_x86 *c) +void __cpuinit identify_cpu(struct cpuinfo_x86 *c) { int i; u32 xlvl; @@ -1054,7 +1083,7 @@ void __init identify_cpu(struct cpuinfo_x86 *c) } -void __init print_cpu_info(struct cpuinfo_x86 *c) +void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) { if (c->x86_model_id[0]) printk("%s", c->x86_model_id); diff --git a/arch/x86_64/kernel/setup64.c b/arch/x86_64/kernel/setup64.c index 678b7ac33b8b..f2b9c6bc999a 100644 --- a/arch/x86_64/kernel/setup64.c +++ b/arch/x86_64/kernel/setup64.c @@ -29,7 +29,7 @@ char x86_boot_params[BOOT_PARAM_SIZE] __initdata = {0,}; -cpumask_t cpu_initialized __initdata = CPU_MASK_NONE; +cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; struct x8664_pda cpu_pda[NR_CPUS] __cacheline_aligned; @@ -171,7 +171,7 @@ void syscall_init(void) wrmsrl(MSR_SYSCALL_MASK, EF_TF|EF_DF|EF_IE|0x3000); } -void __init check_efer(void) +void __cpuinit check_efer(void) { unsigned long efer; @@ -188,7 +188,7 @@ void __init check_efer(void) * 'CPU state barrier', nothing should get across. * A lot of state is already set up in PDA init. */ -void __init cpu_init (void) +void __cpuinit cpu_init (void) { #ifdef CONFIG_SMP int cpu = stack_smp_processor_id(); diff --git a/arch/x86_64/kernel/smp.c b/arch/x86_64/kernel/smp.c index 1e379ed17b1d..ccae392886af 100644 --- a/arch/x86_64/kernel/smp.c +++ b/arch/x86_64/kernel/smp.c @@ -283,6 +283,16 @@ struct call_data_struct { static struct call_data_struct * call_data; +void lock_ipi_call_lock(void) +{ + spin_lock_irq(&call_lock); +} + +void unlock_ipi_call_lock(void) +{ + spin_unlock_irq(&call_lock); +} + /* * this function sends a 'generic call function' IPI to all other CPUs * in the system. diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c index f1ec0f345941..b969ee128728 100644 --- a/arch/x86_64/kernel/smpboot.c +++ b/arch/x86_64/kernel/smpboot.c @@ -34,6 +34,7 @@ * Andi Kleen : Converted to new state machine. * Various cleanups. * Probably mostly hotplug CPU ready now. + * Ashok Raj : CPU hotplug support */ @@ -58,11 +59,6 @@ #include <asm/proto.h> #include <asm/nmi.h> -/* Change for real CPU hotplug. Note other files need to be fixed - first too. */ -#define __cpuinit __init -#define __cpuinitdata __initdata - /* Number of siblings per CPU package */ int smp_num_siblings = 1; /* Package ID of each logical CPU */ @@ -103,6 +99,37 @@ EXPORT_SYMBOL(cpu_core_map); extern unsigned char trampoline_data[]; extern unsigned char trampoline_end[]; +/* State of each CPU */ +DEFINE_PER_CPU(int, cpu_state) = { 0 }; + +/* + * Store all idle threads, this can be reused instead of creating + * a new thread. Also avoids complicated thread destroy functionality + * for idle threads. + */ +struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ; + +#define get_idle_for_cpu(x) (idle_thread_array[(x)]) +#define set_idle_for_cpu(x,p) (idle_thread_array[(x)] = (p)) + +/* + * cpu_possible_map should be static, it cannot change as cpu's + * are onlined, or offlined. The reason is per-cpu data-structures + * are allocated by some modules at init time, and dont expect to + * do this dynamically on cpu arrival/departure. + * cpu_present_map on the other hand can change dynamically. + * In case when cpu_hotplug is not compiled, then we resort to current + * behaviour, which is cpu_possible == cpu_present. + * If cpu-hotplug is supported, then we need to preallocate for all + * those NR_CPUS, hence cpu_possible_map represents entire NR_CPUS range. + * - Ashok Raj + */ +#ifdef CONFIG_HOTPLUG_CPU +#define fixup_cpu_possible_map(x) cpu_set((x), cpu_possible_map) +#else +#define fixup_cpu_possible_map(x) +#endif + /* * Currently trivial. Write the real->protected mode * bootstrap into the page concerned. The caller @@ -418,6 +445,33 @@ void __cpuinit smp_callin(void) cpu_set(cpuid, cpu_callin_map); } +static inline void set_cpu_sibling_map(int cpu) +{ + int i; + + if (smp_num_siblings > 1) { + for_each_cpu(i) { + if (cpu_core_id[cpu] == cpu_core_id[i]) { + cpu_set(i, cpu_sibling_map[cpu]); + cpu_set(cpu, cpu_sibling_map[i]); + } + } + } else { + cpu_set(cpu, cpu_sibling_map[cpu]); + } + + if (current_cpu_data.x86_num_cores > 1) { + for_each_cpu(i) { + if (phys_proc_id[cpu] == phys_proc_id[i]) { + cpu_set(i, cpu_core_map[cpu]); + cpu_set(cpu, cpu_core_map[i]); + } + } + } else { + cpu_core_map[cpu] = cpu_sibling_map[cpu]; + } +} + /* * Setup code on secondary processor (after comming out of the trampoline) */ @@ -448,9 +502,28 @@ void __cpuinit start_secondary(void) enable_APIC_timer(); /* + * The sibling maps must be set before turing the online map on for + * this cpu + */ + set_cpu_sibling_map(smp_processor_id()); + + /* + * We need to hold call_lock, so there is no inconsistency + * between the time smp_call_function() determines number of + * IPI receipients, and the time when the determination is made + * for which cpus receive the IPI in genapic_flat.c. Holding this + * lock helps us to not include this cpu in a currently in progress + * smp_call_function(). + */ + lock_ipi_call_lock(); + + /* * Allow the master to continue. */ cpu_set(smp_processor_id(), cpu_online_map); + per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; + unlock_ipi_call_lock(); + mb(); /* Wait for TSC sync to not schedule things before. @@ -628,33 +701,77 @@ static int __cpuinit wakeup_secondary_via_INIT(int phys_apicid, unsigned int sta return (send_status | accept_status); } +struct create_idle { + struct task_struct *idle; + struct completion done; + int cpu; +}; + +void do_fork_idle(void *_c_idle) +{ + struct create_idle *c_idle = _c_idle; + + c_idle->idle = fork_idle(c_idle->cpu); + complete(&c_idle->done); +} + /* * Boot one CPU. */ static int __cpuinit do_boot_cpu(int cpu, int apicid) { - struct task_struct *idle; unsigned long boot_error; int timeout; unsigned long start_rip; + struct create_idle c_idle = { + .cpu = cpu, + .done = COMPLETION_INITIALIZER(c_idle.done), + }; + DECLARE_WORK(work, do_fork_idle, &c_idle); + + c_idle.idle = get_idle_for_cpu(cpu); + + if (c_idle.idle) { + c_idle.idle->thread.rsp = (unsigned long) (((struct pt_regs *) + (THREAD_SIZE + (unsigned long) c_idle.idle->thread_info)) - 1); + init_idle(c_idle.idle, cpu); + goto do_rest; + } + /* - * We can't use kernel_thread since we must avoid to - * reschedule the child. + * During cold boot process, keventd thread is not spun up yet. + * When we do cpu hot-add, we create idle threads on the fly, we should + * not acquire any attributes from the calling context. Hence the clean + * way to create kernel_threads() is to do that from keventd(). + * We do the current_is_keventd() due to the fact that ACPI notifier + * was also queuing to keventd() and when the caller is already running + * in context of keventd(), we would end up with locking up the keventd + * thread. */ - idle = fork_idle(cpu); - if (IS_ERR(idle)) { + if (!keventd_up() || current_is_keventd()) + work.func(work.data); + else { + schedule_work(&work); + wait_for_completion(&c_idle.done); + } + + if (IS_ERR(c_idle.idle)) { printk("failed fork for CPU %d\n", cpu); - return PTR_ERR(idle); + return PTR_ERR(c_idle.idle); } - cpu_pda[cpu].pcurrent = idle; + set_idle_for_cpu(cpu, c_idle.idle); + +do_rest: + + cpu_pda[cpu].pcurrent = c_idle.idle; start_rip = setup_trampoline(); - init_rsp = idle->thread.rsp; + init_rsp = c_idle.idle->thread.rsp; per_cpu(init_tss,cpu).rsp0 = init_rsp; initial_code = start_secondary; - clear_ti_thread_flag(idle->thread_info, TIF_FORK); + clear_ti_thread_flag(c_idle.idle->thread_info, TIF_FORK); printk(KERN_INFO "Booting processor %d/%d rip %lx rsp %lx\n", cpu, apicid, start_rip, init_rsp); @@ -746,51 +863,6 @@ cycles_t cacheflush_time; unsigned long cache_decay_ticks; /* - * Construct cpu_sibling_map[], so that we can tell the sibling CPU - * on SMT systems efficiently. - */ -static __cpuinit void detect_siblings(void) -{ - int cpu; - - for (cpu = 0; cpu < NR_CPUS; cpu++) { - cpus_clear(cpu_sibling_map[cpu]); - cpus_clear(cpu_core_map[cpu]); - } - - for_each_online_cpu (cpu) { - struct cpuinfo_x86 *c = cpu_data + cpu; - int siblings = 0; - int i; - if (smp_num_siblings > 1) { - for_each_online_cpu (i) { - if (cpu_core_id[cpu] == cpu_core_id[i]) { - siblings++; - cpu_set(i, cpu_sibling_map[cpu]); - } - } - } else { - siblings++; - cpu_set(cpu, cpu_sibling_map[cpu]); - } - - if (siblings != smp_num_siblings) { - printk(KERN_WARNING - "WARNING: %d siblings found for CPU%d, should be %d\n", - siblings, cpu, smp_num_siblings); - smp_num_siblings = siblings; - } - if (c->x86_num_cores > 1) { - for_each_online_cpu(i) { - if (phys_proc_id[cpu] == phys_proc_id[i]) - cpu_set(i, cpu_core_map[cpu]); - } - } else - cpu_core_map[cpu] = cpu_sibling_map[cpu]; - } -} - -/* * Cleanup possible dangling ends... */ static __cpuinit void smp_cleanup_boot(void) @@ -823,7 +895,7 @@ static __cpuinit void smp_cleanup_boot(void) * * RED-PEN audit/test this more. I bet there is more state messed up here. */ -static __cpuinit void disable_smp(void) +static __init void disable_smp(void) { cpu_present_map = cpumask_of_cpu(0); cpu_possible_map = cpumask_of_cpu(0); @@ -838,7 +910,7 @@ static __cpuinit void disable_smp(void) /* * Handle user cpus=... parameter. */ -static __cpuinit void enforce_max_cpus(unsigned max_cpus) +static __init void enforce_max_cpus(unsigned max_cpus) { int i, k; k = 0; @@ -855,7 +927,7 @@ static __cpuinit void enforce_max_cpus(unsigned max_cpus) /* * Various sanity checks. */ -static int __cpuinit smp_sanity_check(unsigned max_cpus) +static int __init smp_sanity_check(unsigned max_cpus) { if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) { printk("weird, boot CPU (#%d) not listed by the BIOS.\n", @@ -913,7 +985,7 @@ static int __cpuinit smp_sanity_check(unsigned max_cpus) * Prepare for SMP bootup. The MP table or ACPI has been read * earlier. Just do some sanity checking here and enable APIC mode. */ -void __cpuinit smp_prepare_cpus(unsigned int max_cpus) +void __init smp_prepare_cpus(unsigned int max_cpus) { int i; @@ -930,10 +1002,9 @@ void __cpuinit smp_prepare_cpus(unsigned int max_cpus) int apicid = cpu_present_to_apicid(i); if (physid_isset(apicid, phys_cpu_present_map)) { cpu_set(i, cpu_present_map); - /* possible map would be different if we supported real - CPU hotplug. */ cpu_set(i, cpu_possible_map); } + fixup_cpu_possible_map(i); } if (smp_sanity_check(max_cpus) < 0) { @@ -978,13 +1049,13 @@ void __init smp_prepare_boot_cpu(void) int me = smp_processor_id(); cpu_set(me, cpu_online_map); cpu_set(me, cpu_callout_map); + cpu_set(0, cpu_sibling_map[0]); + cpu_set(0, cpu_core_map[0]); + per_cpu(cpu_state, me) = CPU_ONLINE; } /* * Entry point to boot a CPU. - * - * This is all __cpuinit, not __devinit for now because we don't support - * CPU hotplug (yet). */ int __cpuinit __cpu_up(unsigned int cpu) { @@ -1001,6 +1072,15 @@ int __cpuinit __cpu_up(unsigned int cpu) return -EINVAL; } + /* + * Already booted CPU? + */ + if (cpu_isset(cpu, cpu_callin_map)) { + Dprintk("do_boot_cpu %d Already started\n", cpu); + return -ENOSYS; + } + + per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; /* Boot it! */ err = do_boot_cpu(cpu, apicid); if (err < 0) { @@ -1013,23 +1093,118 @@ int __cpuinit __cpu_up(unsigned int cpu) while (!cpu_isset(cpu, cpu_online_map)) cpu_relax(); - return 0; + err = 0; + + return err; } /* * Finish the SMP boot. */ -void __cpuinit smp_cpus_done(unsigned int max_cpus) +void __init smp_cpus_done(unsigned int max_cpus) { +#ifndef CONFIG_HOTPLUG_CPU zap_low_mappings(); +#endif smp_cleanup_boot(); #ifdef CONFIG_X86_IO_APIC setup_ioapic_dest(); #endif - detect_siblings(); time_init_gtod(); check_nmi_watchdog(); } + +#ifdef CONFIG_HOTPLUG_CPU + +static void remove_siblinginfo(int cpu) +{ + int sibling; + + for_each_cpu_mask(sibling, cpu_sibling_map[cpu]) + cpu_clear(cpu, cpu_sibling_map[sibling]); + for_each_cpu_mask(sibling, cpu_core_map[cpu]) + cpu_clear(cpu, cpu_core_map[sibling]); + cpus_clear(cpu_sibling_map[cpu]); + cpus_clear(cpu_core_map[cpu]); + phys_proc_id[cpu] = BAD_APICID; + cpu_core_id[cpu] = BAD_APICID; +} + +void remove_cpu_from_maps(void) +{ + int cpu = smp_processor_id(); + + cpu_clear(cpu, cpu_callout_map); + cpu_clear(cpu, cpu_callin_map); + clear_bit(cpu, &cpu_initialized); /* was set by cpu_init() */ +} + +int __cpu_disable(void) +{ + int cpu = smp_processor_id(); + + /* + * Perhaps use cpufreq to drop frequency, but that could go + * into generic code. + * + * We won't take down the boot processor on i386 due to some + * interrupts only being able to be serviced by the BSP. + * Especially so if we're not using an IOAPIC -zwane + */ + if (cpu == 0) + return -EBUSY; + + disable_APIC_timer(); + + /* + * HACK: + * Allow any queued timer interrupts to get serviced + * This is only a temporary solution until we cleanup + * fixup_irqs as we do for IA64. + */ + local_irq_enable(); + mdelay(1); + + local_irq_disable(); + remove_siblinginfo(cpu); + + /* It's now safe to remove this processor from the online map */ + cpu_clear(cpu, cpu_online_map); + remove_cpu_from_maps(); + fixup_irqs(cpu_online_map); + return 0; +} + +void __cpu_die(unsigned int cpu) +{ + /* We don't do anything here: idle task is faking death itself. */ + unsigned int i; + + for (i = 0; i < 10; i++) { + /* They ack this in play_dead by setting CPU_DEAD */ + if (per_cpu(cpu_state, cpu) == CPU_DEAD) { + printk ("CPU %d is now offline\n", cpu); + return; + } + current->state = TASK_UNINTERRUPTIBLE; + schedule_timeout(HZ/10); + } + printk(KERN_ERR "CPU %u didn't die...\n", cpu); +} + +#else /* ... !CONFIG_HOTPLUG_CPU */ + +int __cpu_disable(void) +{ + return -ENOSYS; +} + +void __cpu_die(unsigned int cpu) +{ + /* We said "no" in __cpu_disable */ + BUG(); +} +#endif /* CONFIG_HOTPLUG_CPU */ diff --git a/arch/x86_64/kernel/suspend.c b/arch/x86_64/kernel/suspend.c index ebaa1e37d657..6c0f402e3a88 100644 --- a/arch/x86_64/kernel/suspend.c +++ b/arch/x86_64/kernel/suspend.c @@ -44,7 +44,6 @@ void __save_processor_state(struct saved_context *ctxt) */ asm volatile ("sgdt %0" : "=m" (ctxt->gdt_limit)); asm volatile ("sidt %0" : "=m" (ctxt->idt_limit)); - asm volatile ("sldt %0" : "=m" (ctxt->ldt)); asm volatile ("str %0" : "=m" (ctxt->tr)); /* XMM0..XMM15 should be handled by kernel_fpu_begin(). */ @@ -69,6 +68,7 @@ void __save_processor_state(struct saved_context *ctxt) asm volatile ("movq %%cr2, %0" : "=r" (ctxt->cr2)); asm volatile ("movq %%cr3, %0" : "=r" (ctxt->cr3)); asm volatile ("movq %%cr4, %0" : "=r" (ctxt->cr4)); + asm volatile ("movq %%cr8, %0" : "=r" (ctxt->cr8)); } void save_processor_state(void) @@ -90,12 +90,20 @@ void __restore_processor_state(struct saved_context *ctxt) /* * control registers */ + asm volatile ("movq %0, %%cr8" :: "r" (ctxt->cr8)); asm volatile ("movq %0, %%cr4" :: "r" (ctxt->cr4)); asm volatile ("movq %0, %%cr3" :: "r" (ctxt->cr3)); asm volatile ("movq %0, %%cr2" :: "r" (ctxt->cr2)); asm volatile ("movq %0, %%cr0" :: "r" (ctxt->cr0)); /* + * now restore the descriptor tables to their proper values + * ltr is done i fix_processor_context(). + */ + asm volatile ("lgdt %0" :: "m" (ctxt->gdt_limit)); + asm volatile ("lidt %0" :: "m" (ctxt->idt_limit)); + + /* * segment registers */ asm volatile ("movw %0, %%ds" :: "r" (ctxt->ds)); @@ -108,14 +116,6 @@ void __restore_processor_state(struct saved_context *ctxt) wrmsrl(MSR_GS_BASE, ctxt->gs_base); wrmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base); - /* - * now restore the descriptor tables to their proper values - * ltr is done i fix_processor_context(). - */ - asm volatile ("lgdt %0" :: "m" (ctxt->gdt_limit)); - asm volatile ("lidt %0" :: "m" (ctxt->idt_limit)); - asm volatile ("lldt %0" :: "m" (ctxt->ldt)); - fix_processor_context(); do_fpu_end(); diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c index 121646fc43f6..102736630002 100644 --- a/arch/x86_64/kernel/traps.c +++ b/arch/x86_64/kernel/traps.c @@ -586,11 +586,17 @@ static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs) asmlinkage void default_do_nmi(struct pt_regs *regs) { unsigned char reason = 0; + int cpu; + + cpu = smp_processor_id(); /* Only the BSP gets external NMIs from the system. */ - if (!smp_processor_id()) + if (!cpu) reason = get_nmi_reason(); + if (!cpu_online(cpu)) + return; + if (!(reason & 0xc0)) { if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 0, SIGINT) == NOTIFY_STOP) diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S index 59ebd5beda87..73389f51c4e5 100644 --- a/arch/x86_64/kernel/vmlinux.lds.S +++ b/arch/x86_64/kernel/vmlinux.lds.S @@ -2,7 +2,10 @@ * Written by Martin Mares <mj@atrey.karlin.mff.cuni.cz>; */ +#define LOAD_OFFSET __START_KERNEL_map + #include <asm-generic/vmlinux.lds.h> +#include <asm/page.h> #include <linux/config.h> OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64", "elf64-x86-64") @@ -11,28 +14,30 @@ ENTRY(phys_startup_64) jiffies_64 = jiffies; SECTIONS { - . = 0xffffffff80100000; + . = __START_KERNEL; phys_startup_64 = startup_64 - LOAD_OFFSET; _text = .; /* Text and read-only data */ - .text : { + .text : AT(ADDR(.text) - LOAD_OFFSET) { *(.text) SCHED_TEXT LOCK_TEXT *(.fixup) *(.gnu.warning) } = 0x9090 - .text.lock : { *(.text.lock) } /* out-of-line lock text */ + /* out-of-line lock text */ + .text.lock : AT(ADDR(.text.lock) - LOAD_OFFSET) { *(.text.lock) } _etext = .; /* End of text section */ . = ALIGN(16); /* Exception table */ __start___ex_table = .; - __ex_table : { *(__ex_table) } + __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { *(__ex_table) } __stop___ex_table = .; RODATA - .data : { /* Data */ + /* Data */ + .data : AT(ADDR(.data) - LOAD_OFFSET) { *(.data) CONSTRUCTORS } @@ -40,62 +45,95 @@ SECTIONS _edata = .; /* End of data section */ __bss_start = .; /* BSS */ - .bss : { + .bss : AT(ADDR(.bss) - LOAD_OFFSET) { *(.bss.page_aligned) *(.bss) } __bss_end = .; + . = ALIGN(PAGE_SIZE); . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); - .data.cacheline_aligned : { *(.data.cacheline_aligned) } + .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) { + *(.data.cacheline_aligned) + } + +#define VSYSCALL_ADDR (-10*1024*1024) +#define VSYSCALL_PHYS_ADDR ((LOADADDR(.data.cacheline_aligned) + SIZEOF(.data.cacheline_aligned) + 4095) & ~(4095)) +#define VSYSCALL_VIRT_ADDR ((ADDR(.data.cacheline_aligned) + SIZEOF(.data.cacheline_aligned) + 4095) & ~(4095)) + +#define VLOAD_OFFSET (VSYSCALL_ADDR - VSYSCALL_PHYS_ADDR) +#define VLOAD(x) (ADDR(x) - VLOAD_OFFSET) + +#define VVIRT_OFFSET (VSYSCALL_ADDR - VSYSCALL_VIRT_ADDR) +#define VVIRT(x) (ADDR(x) - VVIRT_OFFSET) -#define AFTER(x) BINALIGN(LOADADDR(x) + SIZEOF(x), 16) -#define BINALIGN(x,y) (((x) + (y) - 1) & ~((y) - 1)) -#define CACHE_ALIGN(x) BINALIGN(x, CONFIG_X86_L1_CACHE_BYTES) + . = VSYSCALL_ADDR; + .vsyscall_0 : AT(VSYSCALL_PHYS_ADDR) { *(.vsyscall_0) } + __vsyscall_0 = VSYSCALL_VIRT_ADDR; - .vsyscall_0 -10*1024*1024: AT ((LOADADDR(.data.cacheline_aligned) + SIZEOF(.data.cacheline_aligned) + 4095) & ~(4095)) { *(.vsyscall_0) } - __vsyscall_0 = LOADADDR(.vsyscall_0); . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); - .xtime_lock : AT CACHE_ALIGN(AFTER(.vsyscall_0)) { *(.xtime_lock) } - xtime_lock = LOADADDR(.xtime_lock); - .vxtime : AT AFTER(.xtime_lock) { *(.vxtime) } - vxtime = LOADADDR(.vxtime); - .wall_jiffies : AT AFTER(.vxtime) { *(.wall_jiffies) } - wall_jiffies = LOADADDR(.wall_jiffies); - .sys_tz : AT AFTER(.wall_jiffies) { *(.sys_tz) } - sys_tz = LOADADDR(.sys_tz); - .sysctl_vsyscall : AT AFTER(.sys_tz) { *(.sysctl_vsyscall) } - sysctl_vsyscall = LOADADDR(.sysctl_vsyscall); - .xtime : AT AFTER(.sysctl_vsyscall) { *(.xtime) } - xtime = LOADADDR(.xtime); + .xtime_lock : AT(VLOAD(.xtime_lock)) { *(.xtime_lock) } + xtime_lock = VVIRT(.xtime_lock); + + .vxtime : AT(VLOAD(.vxtime)) { *(.vxtime) } + vxtime = VVIRT(.vxtime); + + .wall_jiffies : AT(VLOAD(.wall_jiffies)) { *(.wall_jiffies) } + wall_jiffies = VVIRT(.wall_jiffies); + + .sys_tz : AT(VLOAD(.sys_tz)) { *(.sys_tz) } + sys_tz = VVIRT(.sys_tz); + + .sysctl_vsyscall : AT(VLOAD(.sysctl_vsyscall)) { *(.sysctl_vsyscall) } + sysctl_vsyscall = VVIRT(.sysctl_vsyscall); + + .xtime : AT(VLOAD(.xtime)) { *(.xtime) } + xtime = VVIRT(.xtime); + . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); - .jiffies : AT CACHE_ALIGN(AFTER(.xtime)) { *(.jiffies) } - jiffies = LOADADDR(.jiffies); - .vsyscall_1 ADDR(.vsyscall_0) + 1024: AT (LOADADDR(.vsyscall_0) + 1024) { *(.vsyscall_1) } - . = LOADADDR(.vsyscall_0) + 4096; + .jiffies : AT(VLOAD(.jiffies)) { *(.jiffies) } + jiffies = VVIRT(.jiffies); + + .vsyscall_1 ADDR(.vsyscall_0) + 1024: AT(VLOAD(.vsyscall_1)) { *(.vsyscall_1) } + .vsyscall_2 ADDR(.vsyscall_0) + 2048: AT(VLOAD(.vsyscall_2)) { *(.vsyscall_2) } + .vsyscall_3 ADDR(.vsyscall_0) + 3072: AT(VLOAD(.vsyscall_3)) { *(.vsyscall_3) } + + . = VSYSCALL_VIRT_ADDR + 4096; + +#undef VSYSCALL_ADDR +#undef VSYSCALL_PHYS_ADDR +#undef VSYSCALL_VIRT_ADDR +#undef VLOAD_OFFSET +#undef VLOAD +#undef VVIRT_OFFSET +#undef VVIRT . = ALIGN(8192); /* init_task */ - .data.init_task : { *(.data.init_task) } + .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) { + *(.data.init_task) + } . = ALIGN(4096); - .data.page_aligned : { *(.data.page_aligned) } + .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) { + *(.data.page_aligned) + } . = ALIGN(4096); /* Init code and data */ __init_begin = .; - .init.text : { + .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) { _sinittext = .; *(.init.text) _einittext = .; } __initdata_begin = .; - .init.data : { *(.init.data) } + .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) { *(.init.data) } __initdata_end = .; . = ALIGN(16); __setup_start = .; - .init.setup : { *(.init.setup) } + .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) { *(.init.setup) } __setup_end = .; __initcall_start = .; - .initcall.init : { + .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) { *(.initcall1.init) *(.initcall2.init) *(.initcall3.init) @@ -106,32 +144,38 @@ SECTIONS } __initcall_end = .; __con_initcall_start = .; - .con_initcall.init : { *(.con_initcall.init) } + .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) { + *(.con_initcall.init) + } __con_initcall_end = .; SECURITY_INIT . = ALIGN(8); __alt_instructions = .; - .altinstructions : { *(.altinstructions) } + .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) { + *(.altinstructions) + } __alt_instructions_end = .; - .altinstr_replacement : { *(.altinstr_replacement) } + .altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) { + *(.altinstr_replacement) + } /* .exit.text is discard at runtime, not link time, to deal with references from .altinstructions and .eh_frame */ - .exit.text : { *(.exit.text) } - .exit.data : { *(.exit.data) } + .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) { *(.exit.text) } + .exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) { *(.exit.data) } . = ALIGN(4096); __initramfs_start = .; - .init.ramfs : { *(.init.ramfs) } + .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) { *(.init.ramfs) } __initramfs_end = .; . = ALIGN(32); __per_cpu_start = .; - .data.percpu : { *(.data.percpu) } + .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { *(.data.percpu) } __per_cpu_end = .; . = ALIGN(4096); __init_end = .; . = ALIGN(4096); __nosave_begin = .; - .data_nosave : { *(.data.nosave) } + .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { *(.data.nosave) } . = ALIGN(4096); __nosave_end = .; diff --git a/arch/x86_64/mm/numa.c b/arch/x86_64/mm/numa.c index 84cde796ecb1..ac61c186eb02 100644 --- a/arch/x86_64/mm/numa.c +++ b/arch/x86_64/mm/numa.c @@ -251,7 +251,7 @@ void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn) setup_node_bootmem(0, start_pfn << PAGE_SHIFT, end_pfn << PAGE_SHIFT); } -__init void numa_add_cpu(int cpu) +__cpuinit void numa_add_cpu(int cpu) { /* BP is initialized elsewhere */ if (cpu) diff --git a/arch/xtensa/platform-iss/console.c b/arch/xtensa/platform-iss/console.c index 9e2b53f6a907..4fbddf92a921 100644 --- a/arch/xtensa/platform-iss/console.c +++ b/arch/xtensa/platform-iss/console.c @@ -198,9 +198,6 @@ static int rs_read_proc(char *page, char **start, off_t off, int count, } -int register_serial(struct serial_struct*); -void unregister_serial(int); - static struct tty_operations serial_ops = { .open = rs_open, .close = rs_close, |