summaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig23
-rw-r--r--arch/x86/Kconfig.cpu14
-rw-r--r--arch/x86/Kconfig.debug4
-rw-r--r--arch/x86/Makefile_32.cpu7
-rw-r--r--arch/x86/boot/compressed/head_64.S3
-rw-r--r--arch/x86/boot/compressed/relocs.c87
-rw-r--r--arch/x86/boot/compressed/vmlinux.lds.S3
-rw-r--r--arch/x86/boot/header.S2
-rw-r--r--arch/x86/boot/version.c4
-rw-r--r--arch/x86/boot/video.c6
-rw-r--r--arch/x86/crypto/Makefile3
-rw-r--r--arch/x86/crypto/aesni-intel_asm.S517
-rw-r--r--arch/x86/crypto/ghash-clmulni-intel_asm.S157
-rw-r--r--arch/x86/crypto/ghash-clmulni-intel_glue.c333
-rw-r--r--arch/x86/ia32/ia32entry.S5
-rw-r--r--arch/x86/ia32/sys_ia32.c99
-rw-r--r--arch/x86/include/asm/acpi.h3
-rw-r--r--arch/x86/include/asm/amd_iommu_proto.h4
-rw-r--r--arch/x86/include/asm/asm-offsets.h1
-rw-r--r--arch/x86/include/asm/cache.h7
-rw-r--r--arch/x86/include/asm/cacheflush.h2
-rw-r--r--arch/x86/include/asm/cpufeature.h2
-rw-r--r--arch/x86/include/asm/desc_defs.h4
-rw-r--r--arch/x86/include/asm/dma-mapping.h2
-rw-r--r--arch/x86/include/asm/e820.h23
-rw-r--r--arch/x86/include/asm/elf.h21
-rw-r--r--arch/x86/include/asm/entry_arch.h2
-rw-r--r--arch/x86/include/asm/geode.h219
-rw-r--r--arch/x86/include/asm/hardirq.h2
-rw-r--r--arch/x86/include/asm/hpet.h7
-rw-r--r--arch/x86/include/asm/hw_irq.h7
-rw-r--r--arch/x86/include/asm/i387.h7
-rw-r--r--arch/x86/include/asm/inst.h150
-rw-r--r--arch/x86/include/asm/irq.h2
-rw-r--r--arch/x86/include/asm/irq_vectors.h4
-rw-r--r--arch/x86/include/asm/k8.h5
-rw-r--r--arch/x86/include/asm/kvm.h30
-rw-r--r--arch/x86/include/asm/kvm_emulate.h2
-rw-r--r--arch/x86/include/asm/kvm_host.h34
-rw-r--r--arch/x86/include/asm/microcode.h2
-rw-r--r--arch/x86/include/asm/mmzone_32.h2
-rw-r--r--arch/x86/include/asm/mpspec.h11
-rw-r--r--arch/x86/include/asm/msr-index.h2
-rw-r--r--arch/x86/include/asm/msr.h19
-rw-r--r--arch/x86/include/asm/olpc.h2
-rw-r--r--arch/x86/include/asm/page_types.h3
-rw-r--r--arch/x86/include/asm/paravirt.h14
-rw-r--r--arch/x86/include/asm/paravirt_types.h14
-rw-r--r--arch/x86/include/asm/pci_x86.h20
-rw-r--r--arch/x86/include/asm/percpu.h104
-rw-r--r--arch/x86/include/asm/pgtable.h6
-rw-r--r--arch/x86/include/asm/processor.h2
-rw-r--r--arch/x86/include/asm/proto.h17
-rw-r--r--arch/x86/include/asm/ptrace.h2
-rw-r--r--arch/x86/include/asm/sections.h6
-rw-r--r--arch/x86/include/asm/sigcontext.h4
-rw-r--r--arch/x86/include/asm/spinlock.h62
-rw-r--r--arch/x86/include/asm/spinlock_types.h10
-rw-r--r--arch/x86/include/asm/stacktrace.h24
-rw-r--r--arch/x86/include/asm/svm.h3
-rw-r--r--arch/x86/include/asm/swiotlb.h8
-rw-r--r--arch/x86/include/asm/sys_ia32.h9
-rw-r--r--arch/x86/include/asm/syscalls.h34
-rw-r--r--arch/x86/include/asm/system.h1
-rw-r--r--arch/x86/include/asm/thread_info.h7
-rw-r--r--arch/x86/include/asm/topology.h9
-rw-r--r--arch/x86/include/asm/trampoline.h1
-rw-r--r--arch/x86/include/asm/unistd_32.h3
-rw-r--r--arch/x86/include/asm/unistd_64.h2
-rw-r--r--arch/x86/include/asm/uv/bios.h11
-rw-r--r--arch/x86/include/asm/uv/uv_bau.h2
-rw-r--r--arch/x86/include/asm/uv/uv_hub.h44
-rw-r--r--arch/x86/include/asm/vmx.h4
-rw-r--r--arch/x86/include/asm/x86_init.h4
-rw-r--r--arch/x86/include/asm/xen/hypervisor.h27
-rw-r--r--arch/x86/kernel/Makefile1
-rw-r--r--arch/x86/kernel/acpi/boot.c3
-rw-r--r--arch/x86/kernel/acpi/cstate.c2
-rw-r--r--arch/x86/kernel/acpi/sleep.c24
-rw-r--r--arch/x86/kernel/amd_iommu.c54
-rw-r--r--arch/x86/kernel/amd_iommu_init.c12
-rw-r--r--arch/x86/kernel/aperture_64.c11
-rw-r--r--arch/x86/kernel/apic/apic.c4
-rw-r--r--arch/x86/kernel/apic/apic_flat_64.c5
-rw-r--r--arch/x86/kernel/apic/apic_noop.c2
-rw-r--r--arch/x86/kernel/apic/bigsmp_32.c5
-rw-r--r--arch/x86/kernel/apic/es7000_32.c12
-rw-r--r--arch/x86/kernel/apic/io_apic.c85
-rw-r--r--arch/x86/kernel/apic/nmi.c8
-rw-r--r--arch/x86/kernel/apic/numaq_32.c5
-rw-r--r--arch/x86/kernel/apic/x2apic_cluster.c5
-rw-r--r--arch/x86/kernel/apic/x2apic_phys.c5
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c24
-rw-r--r--arch/x86/kernel/bios_uv.c8
-rw-r--r--arch/x86/kernel/cpu/addon_cpuid_features.c15
-rw-r--r--arch/x86/kernel/cpu/amd.c55
-rw-r--r--arch/x86/kernel/cpu/common.c18
-rw-r--r--arch/x86/kernel/cpu/cpu_debug.c30
-rw-r--r--arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c45
-rw-r--r--arch/x86/kernel/cpu/cpufreq/longhaul.c2
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k6.c2
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k7.c19
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.c32
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-ich.c2
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-lib.c6
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-lib.h24
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-smi.c2
-rw-r--r--arch/x86/kernel/cpu/intel.c9
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c72
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-inject.c22
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c11
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c20
-rw-r--r--arch/x86/kernel/cpu/mtrr/cleanup.c51
-rw-r--r--arch/x86/kernel/cpu/mtrr/if.c11
-rw-r--r--arch/x86/kernel/cpu/perf_event.c34
-rw-r--r--arch/x86/kernel/cpuid.c5
-rw-r--r--arch/x86/kernel/ds.c4
-rw-r--r--arch/x86/kernel/dumpstack.c41
-rw-r--r--arch/x86/kernel/dumpstack.h6
-rw-r--r--arch/x86/kernel/dumpstack_32.c2
-rw-r--r--arch/x86/kernel/dumpstack_64.c37
-rw-r--r--arch/x86/kernel/e820.c13
-rw-r--r--arch/x86/kernel/entry_32.S69
-rw-r--r--arch/x86/kernel/entry_64.S59
-rw-r--r--arch/x86/kernel/ftrace.c17
-rw-r--r--arch/x86/kernel/geode_32.c196
-rw-r--r--arch/x86/kernel/head32.c2
-rw-r--r--arch/x86/kernel/head64.c2
-rw-r--r--arch/x86/kernel/head_32.S18
-rw-r--r--arch/x86/kernel/head_64.S3
-rw-r--r--arch/x86/kernel/hpet.c77
-rw-r--r--arch/x86/kernel/hw_breakpoint.c5
-rw-r--r--arch/x86/kernel/ioport.c28
-rw-r--r--arch/x86/kernel/irq.c34
-rw-r--r--arch/x86/kernel/irqinit.c4
-rw-r--r--arch/x86/kernel/kgdb.c14
-rw-r--r--arch/x86/kernel/kprobes.c4
-rw-r--r--arch/x86/kernel/machine_kexec_32.c6
-rw-r--r--arch/x86/kernel/mfgpt_32.c410
-rw-r--r--arch/x86/kernel/microcode_amd.c97
-rw-r--r--arch/x86/kernel/microcode_core.c34
-rw-r--r--arch/x86/kernel/microcode_intel.c47
-rw-r--r--arch/x86/kernel/mpparse.c47
-rw-r--r--arch/x86/kernel/msr.c9
-rw-r--r--arch/x86/kernel/olpc.c4
-rw-r--r--arch/x86/kernel/paravirt-spinlocks.c4
-rw-r--r--arch/x86/kernel/pci-calgary_64.c6
-rw-r--r--arch/x86/kernel/pci-dma.c6
-rw-r--r--arch/x86/kernel/pci-gart_64.c9
-rw-r--r--arch/x86/kernel/pci-swiotlb.c11
-rw-r--r--arch/x86/kernel/process.c93
-rw-r--r--arch/x86/kernel/process_32.c87
-rw-r--r--arch/x86/kernel/process_64.c83
-rw-r--r--arch/x86/kernel/ptrace.c135
-rw-r--r--arch/x86/kernel/quirks.c9
-rw-r--r--arch/x86/kernel/reboot.c8
-rw-r--r--arch/x86/kernel/reboot_fixups_32.c3
-rw-r--r--arch/x86/kernel/setup.c109
-rw-r--r--arch/x86/kernel/setup_percpu.c13
-rw-r--r--arch/x86/kernel/signal.c15
-rw-r--r--arch/x86/kernel/smpboot.c49
-rw-r--r--arch/x86/kernel/stacktrace.c18
-rw-r--r--arch/x86/kernel/sys_i386_32.c27
-rw-r--r--arch/x86/kernel/sys_x86_64.c17
-rw-r--r--arch/x86/kernel/syscall_table_32.S3
-rw-r--r--arch/x86/kernel/tlb_uv.c4
-rw-r--r--arch/x86/kernel/trampoline.c20
-rw-r--r--arch/x86/kernel/tsc.c1
-rw-r--r--arch/x86/kernel/tsc_sync.c10
-rw-r--r--arch/x86/kernel/uv_irq.c3
-rw-r--r--arch/x86/kernel/uv_time.c80
-rw-r--r--arch/x86/kernel/visws_quirks.c2
-rw-r--r--arch/x86/kernel/vm86_32.c11
-rw-r--r--arch/x86/kernel/vmiclock_32.c2
-rw-r--r--arch/x86/kernel/vmlinux.lds.S42
-rw-r--r--arch/x86/kernel/vsyscall_64.c7
-rw-r--r--arch/x86/kernel/x8664_ksyms_64.c6
-rw-r--r--arch/x86/kernel/x86_init.c2
-rw-r--r--arch/x86/kvm/Kconfig1
-rw-r--r--arch/x86/kvm/Makefile3
-rw-r--r--arch/x86/kvm/emulate.c159
-rw-r--r--arch/x86/kvm/i8254.c14
-rw-r--r--arch/x86/kvm/i8259.c44
-rw-r--r--arch/x86/kvm/irq.h7
-rw-r--r--arch/x86/kvm/lapic.c8
-rw-r--r--arch/x86/kvm/mmu.c3
-rw-r--r--arch/x86/kvm/paging_tmpl.h1
-rw-r--r--arch/x86/kvm/svm.c395
-rw-r--r--arch/x86/kvm/trace.h165
-rw-r--r--arch/x86/kvm/vmx.c448
-rw-r--r--arch/x86/kvm/x86.c550
-rw-r--r--arch/x86/lib/Makefile8
-rw-r--r--arch/x86/lib/msr-smp.c204
-rw-r--r--arch/x86/lib/msr.c219
-rw-r--r--arch/x86/mm/init.c4
-rw-r--r--arch/x86/mm/init_32.c10
-rw-r--r--arch/x86/mm/init_64.c35
-rw-r--r--arch/x86/mm/ioremap.c26
-rw-r--r--arch/x86/mm/k8topology_64.c101
-rw-r--r--arch/x86/mm/kmmio.c46
-rw-r--r--arch/x86/mm/mmio-mod.c71
-rw-r--r--arch/x86/mm/numa_32.c4
-rw-r--r--arch/x86/mm/numa_64.c252
-rw-r--r--arch/x86/mm/pageattr.c22
-rw-r--r--arch/x86/mm/pat.c23
-rw-r--r--arch/x86/mm/setup_nx.c59
-rw-r--r--arch/x86/mm/srat_32.c2
-rw-r--r--arch/x86/mm/srat_64.c33
-rw-r--r--arch/x86/mm/tlb.c3
-rw-r--r--arch/x86/oprofile/backtrace.c9
-rw-r--r--arch/x86/pci/Makefile5
-rw-r--r--arch/x86/pci/acpi.c74
-rw-r--r--arch/x86/pci/amd_bus.c120
-rw-r--r--arch/x86/pci/bus_numa.c101
-rw-r--r--arch/x86/pci/bus_numa.h27
-rw-r--r--arch/x86/pci/common.c20
-rw-r--r--arch/x86/pci/early.c7
-rw-r--r--arch/x86/pci/i386.c42
-rw-r--r--arch/x86/pci/intel_bus.c90
-rw-r--r--arch/x86/pci/mmconfig-shared.c356
-rw-r--r--arch/x86/pci/mmconfig_32.c16
-rw-r--r--arch/x86/pci/mmconfig_64.c88
-rw-r--r--arch/x86/tools/chkobjdump.awk2
-rw-r--r--arch/x86/tools/gen-insn-attr-x86.awk20
-rw-r--r--arch/x86/tools/test_get_len.c2
-rw-r--r--arch/x86/vdso/vdso32-setup.c1
-rw-r--r--arch/x86/xen/enlighten.c37
-rw-r--r--arch/x86/xen/mmu.c2
-rw-r--r--arch/x86/xen/smp.c44
-rw-r--r--arch/x86/xen/spinlock.c16
-rw-r--r--arch/x86/xen/suspend.c17
-rw-r--r--arch/x86/xen/time.c31
-rw-r--r--arch/x86/xen/xen-asm_64.S4
-rw-r--r--arch/x86/xen/xen-ops.h2
234 files changed, 5196 insertions, 4105 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 178084b4377c..55298e891571 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -50,7 +50,10 @@ config X86
select HAVE_KERNEL_BZIP2
select HAVE_KERNEL_LZMA
select HAVE_HW_BREAKPOINT
+ select PERF_EVENTS
+ select ANON_INODES
select HAVE_ARCH_KMEMCHECK
+ select HAVE_USER_RETURN_NOTIFIER
config OUTPUT_FORMAT
string
@@ -1331,7 +1334,9 @@ config MATH_EMULATION
kernel, it won't hurt.
config MTRR
- bool "MTRR (Memory Type Range Register) support"
+ bool
+ default y
+ prompt "MTRR (Memory Type Range Register) support" if EMBEDDED
---help---
On Intel P6 family processors (Pentium Pro, Pentium II and later)
the Memory Type Range Registers (MTRRs) may be used to control
@@ -1397,7 +1402,8 @@ config MTRR_SANITIZER_SPARE_REG_NR_DEFAULT
config X86_PAT
bool
- prompt "x86 PAT support"
+ default y
+ prompt "x86 PAT support" if EMBEDDED
depends on MTRR
---help---
Use PAT attributes to setup page level cache control.
@@ -1603,7 +1609,7 @@ config COMPAT_VDSO
depends on X86_32 || IA32_EMULATION
---help---
Map the 32-bit VDSO to the predictable old-style address too.
- ---help---
+
Say N here if you are running a sufficiently recent glibc
version (2.3.3 or later), to remove the high-mapped
VDSO mapping and to exclusively use the randomized VDSO.
@@ -2008,18 +2014,9 @@ config SCx200HR_TIMER
processor goes idle (as is done by the scheduler). The
other workaround is idle=poll boot option.
-config GEODE_MFGPT_TIMER
- def_bool y
- prompt "Geode Multi-Function General Purpose Timer (MFGPT) events"
- depends on MGEODE_LX && GENERIC_TIME && GENERIC_CLOCKEVENTS
- ---help---
- This driver provides a clock event source based on the MFGPT
- timer(s) in the CS5535 and CS5536 companion chip for the geode.
- MFGPTs have a better resolution and max interval than the
- generic PIT, and are suitable for use as high-res timers.
-
config OLPC
bool "One Laptop Per Child support"
+ select GPIOLIB
default n
---help---
Add support for detecting the unique features of the OLPC
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index 5e99762eb5c2..08e442bc3ab9 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -301,15 +301,11 @@ config X86_CPU
#
# Define implied options from the CPU selection here
-config X86_L1_CACHE_BYTES
+config X86_INTERNODE_CACHE_SHIFT
int
- default "128" if MPSC
- default "64" if GENERIC_CPU || MK8 || MCORE2 || MATOM || X86_32
-
-config X86_INTERNODE_CACHE_BYTES
- int
- default "4096" if X86_VSMP
- default X86_L1_CACHE_BYTES if !X86_VSMP
+ default "12" if X86_VSMP
+ default "7" if NUMA
+ default X86_L1_CACHE_SHIFT
config X86_CMPXCHG
def_bool X86_64 || (X86_32 && !M386)
@@ -317,9 +313,9 @@ config X86_CMPXCHG
config X86_L1_CACHE_SHIFT
int
default "7" if MPENTIUM4 || MPSC
+ default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU
default "4" if X86_ELAN || M486 || M386 || MGEODEGX1
default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX
- default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU
config X86_XADD
def_bool y
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 731318e5ac1d..bc01e3ebfeb2 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -187,8 +187,8 @@ config HAVE_MMIOTRACE_SUPPORT
def_bool y
config X86_DECODER_SELFTEST
- bool "x86 instruction decoder selftest"
- depends on DEBUG_KERNEL
+ bool "x86 instruction decoder selftest"
+ depends on DEBUG_KERNEL && KPROBES
---help---
Perform x86 instruction decoder selftests at build time.
This option is useful for checking the sanity of x86 instruction
diff --git a/arch/x86/Makefile_32.cpu b/arch/x86/Makefile_32.cpu
index cbf0776dbec1..1255d953c65d 100644
--- a/arch/x86/Makefile_32.cpu
+++ b/arch/x86/Makefile_32.cpu
@@ -46,6 +46,13 @@ cflags-$(CONFIG_MGEODE_LX) += $(call cc-option,-march=geode,-march=pentium-mmx)
# cpu entries
cflags-$(CONFIG_X86_GENERIC) += $(call tune,generic,$(call tune,i686))
+# Work around the pentium-mmx code generator madness of gcc4.4.x which
+# does stack alignment by generating horrible code _before_ the mcount
+# prologue (push %ebp, mov %esp, %ebp) which breaks the function graph
+# tracer assumptions. For i686, generic, core2 this is set by the
+# compiler anyway
+cflags-$(CONFIG_FUNCTION_GRAPH_TRACER) += $(call cc-option,-maccumulate-outgoing-args)
+
# Bug fix for binutils: this option is required in order to keep
# binutils from generating NOPL instructions against our will.
ifneq ($(CONFIG_X86_P6_NOP),y)
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index 077e1b69198e..faff0dc9c06a 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -107,8 +107,7 @@ ENTRY(startup_32)
lgdt gdt(%ebp)
/* Enable PAE mode */
- xorl %eax, %eax
- orl $(X86_CR4_PAE), %eax
+ movl $(X86_CR4_PAE), %eax
movl %eax, %cr4
/*
diff --git a/arch/x86/boot/compressed/relocs.c b/arch/x86/boot/compressed/relocs.c
index bbeb0c3fbd90..89bbf4e4d05d 100644
--- a/arch/x86/boot/compressed/relocs.c
+++ b/arch/x86/boot/compressed/relocs.c
@@ -9,6 +9,9 @@
#include <byteswap.h>
#define USE_BSD
#include <endian.h>
+#include <regex.h>
+
+static void die(char *fmt, ...);
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
static Elf32_Ehdr ehdr;
@@ -30,25 +33,47 @@ static struct section *secs;
* the address for which it has been compiled. Don't warn user about
* absolute relocations present w.r.t these symbols.
*/
-static const char* safe_abs_relocs[] = {
- "xen_irq_disable_direct_reloc",
- "xen_save_fl_direct_reloc",
-};
+static const char abs_sym_regex[] =
+ "^(xen_irq_disable_direct_reloc$|"
+ "xen_save_fl_direct_reloc$|"
+ "VDSO|"
+ "__crc_)";
+static regex_t abs_sym_regex_c;
+static int is_abs_reloc(const char *sym_name)
+{
+ return !regexec(&abs_sym_regex_c, sym_name, 0, NULL, 0);
+}
-static int is_safe_abs_reloc(const char* sym_name)
+/*
+ * These symbols are known to be relative, even if the linker marks them
+ * as absolute (typically defined outside any section in the linker script.)
+ */
+static const char rel_sym_regex[] =
+ "^_end$";
+static regex_t rel_sym_regex_c;
+static int is_rel_reloc(const char *sym_name)
{
- int i;
+ return !regexec(&rel_sym_regex_c, sym_name, 0, NULL, 0);
+}
- for (i = 0; i < ARRAY_SIZE(safe_abs_relocs); i++) {
- if (!strcmp(sym_name, safe_abs_relocs[i]))
- /* Match found */
- return 1;
- }
- if (strncmp(sym_name, "VDSO", 4) == 0)
- return 1;
- if (strncmp(sym_name, "__crc_", 6) == 0)
- return 1;
- return 0;
+static void regex_init(void)
+{
+ char errbuf[128];
+ int err;
+
+ err = regcomp(&abs_sym_regex_c, abs_sym_regex,
+ REG_EXTENDED|REG_NOSUB);
+ if (err) {
+ regerror(err, &abs_sym_regex_c, errbuf, sizeof errbuf);
+ die("%s", errbuf);
+ }
+
+ err = regcomp(&rel_sym_regex_c, rel_sym_regex,
+ REG_EXTENDED|REG_NOSUB);
+ if (err) {
+ regerror(err, &rel_sym_regex_c, errbuf, sizeof errbuf);
+ die("%s", errbuf);
+ }
}
static void die(char *fmt, ...)
@@ -131,7 +156,7 @@ static const char *rel_type(unsigned type)
#undef REL_TYPE
};
const char *name = "unknown type rel type name";
- if (type < ARRAY_SIZE(type_name)) {
+ if (type < ARRAY_SIZE(type_name) && type_name[type]) {
name = type_name[type];
}
return name;
@@ -448,7 +473,7 @@ static void print_absolute_relocs(void)
* Before warning check if this absolute symbol
* relocation is harmless.
*/
- if (is_safe_abs_reloc(name))
+ if (is_abs_reloc(name) || is_rel_reloc(name))
continue;
if (!printed) {
@@ -501,21 +526,26 @@ static void walk_relocs(void (*visit)(Elf32_Rel *rel, Elf32_Sym *sym))
sym = &sh_symtab[ELF32_R_SYM(rel->r_info)];
r_type = ELF32_R_TYPE(rel->r_info);
/* Don't visit relocations to absolute symbols */
- if (sym->st_shndx == SHN_ABS) {
+ if (sym->st_shndx == SHN_ABS &&
+ !is_rel_reloc(sym_name(sym_strtab, sym))) {
continue;
}
- if (r_type == R_386_NONE || r_type == R_386_PC32) {
+ switch (r_type) {
+ case R_386_NONE:
+ case R_386_PC32:
/*
* NONE can be ignored and and PC relative
* relocations don't need to be adjusted.
*/
- }
- else if (r_type == R_386_32) {
+ break;
+ case R_386_32:
/* Visit relocations that need to be adjusted */
visit(rel, sym);
- }
- else {
- die("Unsupported relocation type: %d\n", r_type);
+ break;
+ default:
+ die("Unsupported relocation type: %s (%d)\n",
+ rel_type(r_type), r_type);
+ break;
}
}
}
@@ -571,16 +601,15 @@ static void emit_relocs(int as_text)
}
else {
unsigned char buf[4];
- buf[0] = buf[1] = buf[2] = buf[3] = 0;
/* Print a stop */
- printf("%c%c%c%c", buf[0], buf[1], buf[2], buf[3]);
+ fwrite("\0\0\0\0", 4, 1, stdout);
/* Now print each relocation */
for (i = 0; i < reloc_count; i++) {
buf[0] = (relocs[i] >> 0) & 0xff;
buf[1] = (relocs[i] >> 8) & 0xff;
buf[2] = (relocs[i] >> 16) & 0xff;
buf[3] = (relocs[i] >> 24) & 0xff;
- printf("%c%c%c%c", buf[0], buf[1], buf[2], buf[3]);
+ fwrite(buf, 4, 1, stdout);
}
}
}
@@ -598,6 +627,8 @@ int main(int argc, char **argv)
FILE *fp;
int i;
+ regex_init();
+
show_absolute_syms = 0;
show_absolute_relocs = 0;
as_text = 0;
diff --git a/arch/x86/boot/compressed/vmlinux.lds.S b/arch/x86/boot/compressed/vmlinux.lds.S
index f4193bb48782..a6f1a59a5b0c 100644
--- a/arch/x86/boot/compressed/vmlinux.lds.S
+++ b/arch/x86/boot/compressed/vmlinux.lds.S
@@ -4,6 +4,7 @@ OUTPUT_FORMAT(CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT)
#undef i386
+#include <asm/cache.h>
#include <asm/page_types.h>
#ifdef CONFIG_X86_64
@@ -46,7 +47,7 @@ SECTIONS
*(.data.*)
_edata = . ;
}
- . = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
+ . = ALIGN(L1_CACHE_BYTES);
.bss : {
_bss = . ;
*(.bss)
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
index b31cc54b4641..93e689f4bd86 100644
--- a/arch/x86/boot/header.S
+++ b/arch/x86/boot/header.S
@@ -16,7 +16,7 @@
*/
#include <asm/segment.h>
-#include <linux/utsrelease.h>
+#include <generated/utsrelease.h>
#include <asm/boot.h>
#include <asm/e820.h>
#include <asm/page_types.h>
diff --git a/arch/x86/boot/version.c b/arch/x86/boot/version.c
index 2723d9b5ce43..2b15aa488ffb 100644
--- a/arch/x86/boot/version.c
+++ b/arch/x86/boot/version.c
@@ -13,8 +13,8 @@
*/
#include "boot.h"
-#include <linux/utsrelease.h>
-#include <linux/compile.h>
+#include <generated/utsrelease.h>
+#include <generated/compile.h>
const char kernel_version[] =
UTS_RELEASE " (" LINUX_COMPILE_BY "@" LINUX_COMPILE_HOST ") "
diff --git a/arch/x86/boot/video.c b/arch/x86/boot/video.c
index d42da3802499..f767164cd5df 100644
--- a/arch/x86/boot/video.c
+++ b/arch/x86/boot/video.c
@@ -27,6 +27,12 @@ static void store_cursor_position(void)
boot_params.screen_info.orig_x = oreg.dl;
boot_params.screen_info.orig_y = oreg.dh;
+
+ if (oreg.ch & 0x20)
+ boot_params.screen_info.flags |= VIDEO_FLAGS_NOCURSOR;
+
+ if ((oreg.ch & 0x1f) > (oreg.cl & 0x1f))
+ boot_params.screen_info.flags |= VIDEO_FLAGS_NOCURSOR;
}
static void store_video_mode(void)
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index cfb0010fa940..1a58ad89fdf7 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -12,6 +12,7 @@ obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o
obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o
obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o
obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o
+obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o
obj-$(CONFIG_CRYPTO_CRC32C_INTEL) += crc32c-intel.o
@@ -24,3 +25,5 @@ twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o
salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o
aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o
+
+ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S
index eb0566e83319..20bb0e1ac681 100644
--- a/arch/x86/crypto/aesni-intel_asm.S
+++ b/arch/x86/crypto/aesni-intel_asm.S
@@ -16,6 +16,7 @@
*/
#include <linux/linkage.h>
+#include <asm/inst.h>
.text
@@ -122,103 +123,72 @@ ENTRY(aesni_set_key)
movups 0x10(%rsi), %xmm2 # other user key
movaps %xmm2, (%rcx)
add $0x10, %rcx
- # aeskeygenassist $0x1, %xmm2, %xmm1 # round 1
- .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x01
+ AESKEYGENASSIST 0x1 %xmm2 %xmm1 # round 1
call _key_expansion_256a
- # aeskeygenassist $0x1, %xmm0, %xmm1
- .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x01
+ AESKEYGENASSIST 0x1 %xmm0 %xmm1
call _key_expansion_256b
- # aeskeygenassist $0x2, %xmm2, %xmm1 # round 2
- .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x02
+ AESKEYGENASSIST 0x2 %xmm2 %xmm1 # round 2
call _key_expansion_256a
- # aeskeygenassist $0x2, %xmm0, %xmm1
- .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x02
+ AESKEYGENASSIST 0x2 %xmm0 %xmm1
call _key_expansion_256b
- # aeskeygenassist $0x4, %xmm2, %xmm1 # round 3
- .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x04
+ AESKEYGENASSIST 0x4 %xmm2 %xmm1 # round 3
call _key_expansion_256a
- # aeskeygenassist $0x4, %xmm0, %xmm1
- .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x04
+ AESKEYGENASSIST 0x4 %xmm0 %xmm1
call _key_expansion_256b
- # aeskeygenassist $0x8, %xmm2, %xmm1 # round 4
- .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x08
+ AESKEYGENASSIST 0x8 %xmm2 %xmm1 # round 4
call _key_expansion_256a
- # aeskeygenassist $0x8, %xmm0, %xmm1
- .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x08
+ AESKEYGENASSIST 0x8 %xmm0 %xmm1
call _key_expansion_256b
- # aeskeygenassist $0x10, %xmm2, %xmm1 # round 5
- .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x10
+ AESKEYGENASSIST 0x10 %xmm2 %xmm1 # round 5
call _key_expansion_256a
- # aeskeygenassist $0x10, %xmm0, %xmm1
- .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x10
+ AESKEYGENASSIST 0x10 %xmm0 %xmm1
call _key_expansion_256b
- # aeskeygenassist $0x20, %xmm2, %xmm1 # round 6
- .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x20
+ AESKEYGENASSIST 0x20 %xmm2 %xmm1 # round 6
call _key_expansion_256a
- # aeskeygenassist $0x20, %xmm0, %xmm1
- .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x20
+ AESKEYGENASSIST 0x20 %xmm0 %xmm1
call _key_expansion_256b
- # aeskeygenassist $0x40, %xmm2, %xmm1 # round 7
- .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x40
+ AESKEYGENASSIST 0x40 %xmm2 %xmm1 # round 7
call _key_expansion_256a
jmp .Ldec_key
.Lenc_key192:
movq 0x10(%rsi), %xmm2 # other user key
- # aeskeygenassist $0x1, %xmm2, %xmm1 # round 1
- .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x01
+ AESKEYGENASSIST 0x1 %xmm2 %xmm1 # round 1
call _key_expansion_192a
- # aeskeygenassist $0x2, %xmm2, %xmm1 # round 2
- .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x02
+ AESKEYGENASSIST 0x2 %xmm2 %xmm1 # round 2
call _key_expansion_192b
- # aeskeygenassist $0x4, %xmm2, %xmm1 # round 3
- .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x04
+ AESKEYGENASSIST 0x4 %xmm2 %xmm1 # round 3
call _key_expansion_192a
- # aeskeygenassist $0x8, %xmm2, %xmm1 # round 4
- .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x08
+ AESKEYGENASSIST 0x8 %xmm2 %xmm1 # round 4
call _key_expansion_192b
- # aeskeygenassist $0x10, %xmm2, %xmm1 # round 5
- .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x10
+ AESKEYGENASSIST 0x10 %xmm2 %xmm1 # round 5
call _key_expansion_192a
- # aeskeygenassist $0x20, %xmm2, %xmm1 # round 6
- .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x20
+ AESKEYGENASSIST 0x20 %xmm2 %xmm1 # round 6
call _key_expansion_192b
- # aeskeygenassist $0x40, %xmm2, %xmm1 # round 7
- .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x40
+ AESKEYGENASSIST 0x40 %xmm2 %xmm1 # round 7
call _key_expansion_192a
- # aeskeygenassist $0x80, %xmm2, %xmm1 # round 8
- .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x80
+ AESKEYGENASSIST 0x80 %xmm2 %xmm1 # round 8
call _key_expansion_192b
jmp .Ldec_key
.Lenc_key128:
- # aeskeygenassist $0x1, %xmm0, %xmm1 # round 1
- .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x01
+ AESKEYGENASSIST 0x1 %xmm0 %xmm1 # round 1
call _key_expansion_128
- # aeskeygenassist $0x2, %xmm0, %xmm1 # round 2
- .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x02
+ AESKEYGENASSIST 0x2 %xmm0 %xmm1 # round 2
call _key_expansion_128
- # aeskeygenassist $0x4, %xmm0, %xmm1 # round 3
- .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x04
+ AESKEYGENASSIST 0x4 %xmm0 %xmm1 # round 3
call _key_expansion_128
- # aeskeygenassist $0x8, %xmm0, %xmm1 # round 4
- .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x08
+ AESKEYGENASSIST 0x8 %xmm0 %xmm1 # round 4
call _key_expansion_128
- # aeskeygenassist $0x10, %xmm0, %xmm1 # round 5
- .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x10
+ AESKEYGENASSIST 0x10 %xmm0 %xmm1 # round 5
call _key_expansion_128
- # aeskeygenassist $0x20, %xmm0, %xmm1 # round 6
- .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x20
+ AESKEYGENASSIST 0x20 %xmm0 %xmm1 # round 6
call _key_expansion_128
- # aeskeygenassist $0x40, %xmm0, %xmm1 # round 7
- .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x40
+ AESKEYGENASSIST 0x40 %xmm0 %xmm1 # round 7
call _key_expansion_128
- # aeskeygenassist $0x80, %xmm0, %xmm1 # round 8
- .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x80
+ AESKEYGENASSIST 0x80 %xmm0 %xmm1 # round 8
call _key_expansion_128
- # aeskeygenassist $0x1b, %xmm0, %xmm1 # round 9
- .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x1b
+ AESKEYGENASSIST 0x1b %xmm0 %xmm1 # round 9
call _key_expansion_128
- # aeskeygenassist $0x36, %xmm0, %xmm1 # round 10
- .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x36
+ AESKEYGENASSIST 0x36 %xmm0 %xmm1 # round 10
call _key_expansion_128
.Ldec_key:
sub $0x10, %rcx
@@ -231,8 +201,7 @@ ENTRY(aesni_set_key)
.align 4
.Ldec_key_loop:
movaps (%rdi), %xmm0
- # aesimc %xmm0, %xmm1
- .byte 0x66, 0x0f, 0x38, 0xdb, 0xc8
+ AESIMC %xmm0 %xmm1
movaps %xmm1, (%rsi)
add $0x10, %rdi
sub $0x10, %rsi
@@ -274,51 +243,37 @@ _aesni_enc1:
je .Lenc192
add $0x20, TKEYP
movaps -0x60(TKEYP), KEY
- # aesenc KEY, STATE
- .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+ AESENC KEY STATE
movaps -0x50(TKEYP), KEY
- # aesenc KEY, STATE
- .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+ AESENC KEY STATE
.align 4
.Lenc192:
movaps -0x40(TKEYP), KEY
- # aesenc KEY, STATE
- .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+ AESENC KEY STATE
movaps -0x30(TKEYP), KEY
- # aesenc KEY, STATE
- .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+ AESENC KEY STATE
.align 4
.Lenc128:
movaps -0x20(TKEYP), KEY
- # aesenc KEY, STATE
- .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+ AESENC KEY STATE
movaps -0x10(TKEYP), KEY
- # aesenc KEY, STATE
- .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+ AESENC KEY STATE
movaps (TKEYP), KEY
- # aesenc KEY, STATE
- .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+ AESENC KEY STATE
movaps 0x10(TKEYP), KEY
- # aesenc KEY, STATE
- .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+ AESENC KEY STATE
movaps 0x20(TKEYP), KEY
- # aesenc KEY, STATE
- .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+ AESENC KEY STATE
movaps 0x30(TKEYP), KEY
- # aesenc KEY, STATE
- .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+ AESENC KEY STATE
movaps 0x40(TKEYP), KEY
- # aesenc KEY, STATE
- .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+ AESENC KEY STATE
movaps 0x50(TKEYP), KEY
- # aesenc KEY, STATE
- .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+ AESENC KEY STATE
movaps 0x60(TKEYP), KEY
- # aesenc KEY, STATE
- .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+ AESENC KEY STATE
movaps 0x70(TKEYP), KEY
- # aesenclast KEY, STATE # last round
- .byte 0x66, 0x0f, 0x38, 0xdd, 0xc2
+ AESENCLAST KEY STATE
ret
/*
@@ -353,135 +308,79 @@ _aesni_enc4:
je .L4enc192
add $0x20, TKEYP
movaps -0x60(TKEYP), KEY
- # aesenc KEY, STATE1
- .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
- # aesenc KEY, STATE2
- .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
- # aesenc KEY, STATE3
- .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
- # aesenc KEY, STATE4
- .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
+ AESENC KEY STATE1
+ AESENC KEY STATE2
+ AESENC KEY STATE3
+ AESENC KEY STATE4
movaps -0x50(TKEYP), KEY
- # aesenc KEY, STATE1
- .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
- # aesenc KEY, STATE2
- .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
- # aesenc KEY, STATE3
- .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
- # aesenc KEY, STATE4
- .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
+ AESENC KEY STATE1
+ AESENC KEY STATE2
+ AESENC KEY STATE3
+ AESENC KEY STATE4
#.align 4
.L4enc192:
movaps -0x40(TKEYP), KEY
- # aesenc KEY, STATE1
- .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
- # aesenc KEY, STATE2
- .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
- # aesenc KEY, STATE3
- .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
- # aesenc KEY, STATE4
- .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
+ AESENC KEY STATE1
+ AESENC KEY STATE2
+ AESENC KEY STATE3
+ AESENC KEY STATE4
movaps -0x30(TKEYP), KEY
- # aesenc KEY, STATE1
- .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
- # aesenc KEY, STATE2
- .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
- # aesenc KEY, STATE3
- .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
- # aesenc KEY, STATE4
- .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
+ AESENC KEY STATE1
+ AESENC KEY STATE2
+ AESENC KEY STATE3
+ AESENC KEY STATE4
#.align 4
.L4enc128:
movaps -0x20(TKEYP), KEY
- # aesenc KEY, STATE1
- .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
- # aesenc KEY, STATE2
- .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
- # aesenc KEY, STATE3
- .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
- # aesenc KEY, STATE4
- .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
+ AESENC KEY STATE1
+ AESENC KEY STATE2
+ AESENC KEY STATE3
+ AESENC KEY STATE4
movaps -0x10(TKEYP), KEY
- # aesenc KEY, STATE1
- .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
- # aesenc KEY, STATE2
- .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
- # aesenc KEY, STATE3
- .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
- # aesenc KEY, STATE4
- .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
+ AESENC KEY STATE1
+ AESENC KEY STATE2
+ AESENC KEY STATE3
+ AESENC KEY STATE4
movaps (TKEYP), KEY
- # aesenc KEY, STATE1
- .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
- # aesenc KEY, STATE2
- .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
- # aesenc KEY, STATE3
- .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
- # aesenc KEY, STATE4
- .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
+ AESENC KEY STATE1
+ AESENC KEY STATE2
+ AESENC KEY STATE3
+ AESENC KEY STATE4
movaps 0x10(TKEYP), KEY
- # aesenc KEY, STATE1
- .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
- # aesenc KEY, STATE2
- .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
- # aesenc KEY, STATE3
- .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
- # aesenc KEY, STATE4
- .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
+ AESENC KEY STATE1
+ AESENC KEY STATE2
+ AESENC KEY STATE3
+ AESENC KEY STATE4
movaps 0x20(TKEYP), KEY
- # aesenc KEY, STATE1
- .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
- # aesenc KEY, STATE2
- .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
- # aesenc KEY, STATE3
- .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
- # aesenc KEY, STATE4
- .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
+ AESENC KEY STATE1
+ AESENC KEY STATE2
+ AESENC KEY STATE3
+ AESENC KEY STATE4
movaps 0x30(TKEYP), KEY
- # aesenc KEY, STATE1
- .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
- # aesenc KEY, STATE2
- .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
- # aesenc KEY, STATE3
- .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
- # aesenc KEY, STATE4
- .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
+ AESENC KEY STATE1
+ AESENC KEY STATE2
+ AESENC KEY STATE3
+ AESENC KEY STATE4
movaps 0x40(TKEYP), KEY
- # aesenc KEY, STATE1
- .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
- # aesenc KEY, STATE2
- .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
- # aesenc KEY, STATE3
- .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
- # aesenc KEY, STATE4
- .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
+ AESENC KEY STATE1
+ AESENC KEY STATE2
+ AESENC KEY STATE3
+ AESENC KEY STATE4
movaps 0x50(TKEYP), KEY
- # aesenc KEY, STATE1
- .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
- # aesenc KEY, STATE2
- .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
- # aesenc KEY, STATE3
- .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
- # aesenc KEY, STATE4
- .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
+ AESENC KEY STATE1
+ AESENC KEY STATE2
+ AESENC KEY STATE3
+ AESENC KEY STATE4
movaps 0x60(TKEYP), KEY
- # aesenc KEY, STATE1
- .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
- # aesenc KEY, STATE2
- .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
- # aesenc KEY, STATE3
- .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
- # aesenc KEY, STATE4
- .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
+ AESENC KEY STATE1
+ AESENC KEY STATE2
+ AESENC KEY STATE3
+ AESENC KEY STATE4
movaps 0x70(TKEYP), KEY
- # aesenclast KEY, STATE1 # last round
- .byte 0x66, 0x0f, 0x38, 0xdd, 0xc2
- # aesenclast KEY, STATE2
- .byte 0x66, 0x0f, 0x38, 0xdd, 0xe2
- # aesenclast KEY, STATE3
- .byte 0x66, 0x0f, 0x38, 0xdd, 0xea
- # aesenclast KEY, STATE4
- .byte 0x66, 0x0f, 0x38, 0xdd, 0xf2
+ AESENCLAST KEY STATE1 # last round
+ AESENCLAST KEY STATE2
+ AESENCLAST KEY STATE3
+ AESENCLAST KEY STATE4
ret
/*
@@ -518,51 +417,37 @@ _aesni_dec1:
je .Ldec192
add $0x20, TKEYP
movaps -0x60(TKEYP), KEY
- # aesdec KEY, STATE
- .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+ AESDEC KEY STATE
movaps -0x50(TKEYP), KEY
- # aesdec KEY, STATE
- .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+ AESDEC KEY STATE
.align 4
.Ldec192:
movaps -0x40(TKEYP), KEY
- # aesdec KEY, STATE
- .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+ AESDEC KEY STATE
movaps -0x30(TKEYP), KEY
- # aesdec KEY, STATE
- .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+ AESDEC KEY STATE
.align 4
.Ldec128:
movaps -0x20(TKEYP), KEY
- # aesdec KEY, STATE
- .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+ AESDEC KEY STATE
movaps -0x10(TKEYP), KEY
- # aesdec KEY, STATE
- .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+ AESDEC KEY STATE
movaps (TKEYP), KEY
- # aesdec KEY, STATE
- .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+ AESDEC KEY STATE
movaps 0x10(TKEYP), KEY
- # aesdec KEY, STATE
- .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+ AESDEC KEY STATE
movaps 0x20(TKEYP), KEY
- # aesdec KEY, STATE
- .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+ AESDEC KEY STATE
movaps 0x30(TKEYP), KEY
- # aesdec KEY, STATE
- .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+ AESDEC KEY STATE
movaps 0x40(TKEYP), KEY
- # aesdec KEY, STATE
- .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+ AESDEC KEY STATE
movaps 0x50(TKEYP), KEY
- # aesdec KEY, STATE
- .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+ AESDEC KEY STATE
movaps 0x60(TKEYP), KEY
- # aesdec KEY, STATE
- .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+ AESDEC KEY STATE
movaps 0x70(TKEYP), KEY
- # aesdeclast KEY, STATE # last round
- .byte 0x66, 0x0f, 0x38, 0xdf, 0xc2
+ AESDECLAST KEY STATE
ret
/*
@@ -597,135 +482,79 @@ _aesni_dec4:
je .L4dec192
add $0x20, TKEYP
movaps -0x60(TKEYP), KEY
- # aesdec KEY, STATE1
- .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
- # aesdec KEY, STATE2
- .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
- # aesdec KEY, STATE3
- .byte 0x66, 0x0f, 0x38, 0xde, 0xea
- # aesdec KEY, STATE4
- .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
+ AESDEC KEY STATE1
+ AESDEC KEY STATE2
+ AESDEC KEY STATE3
+ AESDEC KEY STATE4
movaps -0x50(TKEYP), KEY
- # aesdec KEY, STATE1
- .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
- # aesdec KEY, STATE2
- .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
- # aesdec KEY, STATE3
- .byte 0x66, 0x0f, 0x38, 0xde, 0xea
- # aesdec KEY, STATE4
- .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
+ AESDEC KEY STATE1
+ AESDEC KEY STATE2
+ AESDEC KEY STATE3
+ AESDEC KEY STATE4
.align 4
.L4dec192:
movaps -0x40(TKEYP), KEY
- # aesdec KEY, STATE1
- .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
- # aesdec KEY, STATE2
- .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
- # aesdec KEY, STATE3
- .byte 0x66, 0x0f, 0x38, 0xde, 0xea
- # aesdec KEY, STATE4
- .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
+ AESDEC KEY STATE1
+ AESDEC KEY STATE2
+ AESDEC KEY STATE3
+ AESDEC KEY STATE4
movaps -0x30(TKEYP), KEY
- # aesdec KEY, STATE1
- .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
- # aesdec KEY, STATE2
- .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
- # aesdec KEY, STATE3
- .byte 0x66, 0x0f, 0x38, 0xde, 0xea
- # aesdec KEY, STATE4
- .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
+ AESDEC KEY STATE1
+ AESDEC KEY STATE2
+ AESDEC KEY STATE3
+ AESDEC KEY STATE4
.align 4
.L4dec128:
movaps -0x20(TKEYP), KEY
- # aesdec KEY, STATE1
- .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
- # aesdec KEY, STATE2
- .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
- # aesdec KEY, STATE3
- .byte 0x66, 0x0f, 0x38, 0xde, 0xea
- # aesdec KEY, STATE4
- .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
+ AESDEC KEY STATE1
+ AESDEC KEY STATE2
+ AESDEC KEY STATE3
+ AESDEC KEY STATE4
movaps -0x10(TKEYP), KEY
- # aesdec KEY, STATE1
- .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
- # aesdec KEY, STATE2
- .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
- # aesdec KEY, STATE3
- .byte 0x66, 0x0f, 0x38, 0xde, 0xea
- # aesdec KEY, STATE4
- .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
+ AESDEC KEY STATE1
+ AESDEC KEY STATE2
+ AESDEC KEY STATE3
+ AESDEC KEY STATE4
movaps (TKEYP), KEY
- # aesdec KEY, STATE1
- .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
- # aesdec KEY, STATE2
- .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
- # aesdec KEY, STATE3
- .byte 0x66, 0x0f, 0x38, 0xde, 0xea
- # aesdec KEY, STATE4
- .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
+ AESDEC KEY STATE1
+ AESDEC KEY STATE2
+ AESDEC KEY STATE3
+ AESDEC KEY STATE4
movaps 0x10(TKEYP), KEY
- # aesdec KEY, STATE1
- .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
- # aesdec KEY, STATE2
- .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
- # aesdec KEY, STATE3
- .byte 0x66, 0x0f, 0x38, 0xde, 0xea
- # aesdec KEY, STATE4
- .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
+ AESDEC KEY STATE1
+ AESDEC KEY STATE2
+ AESDEC KEY STATE3
+ AESDEC KEY STATE4
movaps 0x20(TKEYP), KEY
- # aesdec KEY, STATE1
- .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
- # aesdec KEY, STATE2
- .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
- # aesdec KEY, STATE3
- .byte 0x66, 0x0f, 0x38, 0xde, 0xea
- # aesdec KEY, STATE4
- .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
+ AESDEC KEY STATE1
+ AESDEC KEY STATE2
+ AESDEC KEY STATE3
+ AESDEC KEY STATE4
movaps 0x30(TKEYP), KEY
- # aesdec KEY, STATE1
- .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
- # aesdec KEY, STATE2
- .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
- # aesdec KEY, STATE3
- .byte 0x66, 0x0f, 0x38, 0xde, 0xea
- # aesdec KEY, STATE4
- .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
+ AESDEC KEY STATE1
+ AESDEC KEY STATE2
+ AESDEC KEY STATE3
+ AESDEC KEY STATE4
movaps 0x40(TKEYP), KEY
- # aesdec KEY, STATE1
- .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
- # aesdec KEY, STATE2
- .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
- # aesdec KEY, STATE3
- .byte 0x66, 0x0f, 0x38, 0xde, 0xea
- # aesdec KEY, STATE4
- .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
+ AESDEC KEY STATE1
+ AESDEC KEY STATE2
+ AESDEC KEY STATE3
+ AESDEC KEY STATE4
movaps 0x50(TKEYP), KEY
- # aesdec KEY, STATE1
- .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
- # aesdec KEY, STATE2
- .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
- # aesdec KEY, STATE3
- .byte 0x66, 0x0f, 0x38, 0xde, 0xea
- # aesdec KEY, STATE4
- .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
+ AESDEC KEY STATE1
+ AESDEC KEY STATE2
+ AESDEC KEY STATE3
+ AESDEC KEY STATE4
movaps 0x60(TKEYP), KEY
- # aesdec KEY, STATE1
- .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
- # aesdec KEY, STATE2
- .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
- # aesdec KEY, STATE3
- .byte 0x66, 0x0f, 0x38, 0xde, 0xea
- # aesdec KEY, STATE4
- .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
+ AESDEC KEY STATE1
+ AESDEC KEY STATE2
+ AESDEC KEY STATE3
+ AESDEC KEY STATE4
movaps 0x70(TKEYP), KEY
- # aesdeclast KEY, STATE1 # last round
- .byte 0x66, 0x0f, 0x38, 0xdf, 0xc2
- # aesdeclast KEY, STATE2
- .byte 0x66, 0x0f, 0x38, 0xdf, 0xe2
- # aesdeclast KEY, STATE3
- .byte 0x66, 0x0f, 0x38, 0xdf, 0xea
- # aesdeclast KEY, STATE4
- .byte 0x66, 0x0f, 0x38, 0xdf, 0xf2
+ AESDECLAST KEY STATE1 # last round
+ AESDECLAST KEY STATE2
+ AESDECLAST KEY STATE3
+ AESDECLAST KEY STATE4
ret
/*
diff --git a/arch/x86/crypto/ghash-clmulni-intel_asm.S b/arch/x86/crypto/ghash-clmulni-intel_asm.S
new file mode 100644
index 000000000000..1eb7f90cb7b9
--- /dev/null
+++ b/arch/x86/crypto/ghash-clmulni-intel_asm.S
@@ -0,0 +1,157 @@
+/*
+ * Accelerated GHASH implementation with Intel PCLMULQDQ-NI
+ * instructions. This file contains accelerated part of ghash
+ * implementation. More information about PCLMULQDQ can be found at:
+ *
+ * http://software.intel.com/en-us/articles/carry-less-multiplication-and-its-usage-for-computing-the-gcm-mode/
+ *
+ * Copyright (c) 2009 Intel Corp.
+ * Author: Huang Ying <ying.huang@intel.com>
+ * Vinodh Gopal
+ * Erdinc Ozturk
+ * Deniz Karakoyunlu
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <asm/inst.h>
+
+.data
+
+.align 16
+.Lbswap_mask:
+ .octa 0x000102030405060708090a0b0c0d0e0f
+.Lpoly:
+ .octa 0xc2000000000000000000000000000001
+.Ltwo_one:
+ .octa 0x00000001000000000000000000000001
+
+#define DATA %xmm0
+#define SHASH %xmm1
+#define T1 %xmm2
+#define T2 %xmm3
+#define T3 %xmm4
+#define BSWAP %xmm5
+#define IN1 %xmm6
+
+.text
+
+/*
+ * __clmul_gf128mul_ble: internal ABI
+ * input:
+ * DATA: operand1
+ * SHASH: operand2, hash_key << 1 mod poly
+ * output:
+ * DATA: operand1 * operand2 mod poly
+ * changed:
+ * T1
+ * T2
+ * T3
+ */
+__clmul_gf128mul_ble:
+ movaps DATA, T1
+ pshufd $0b01001110, DATA, T2
+ pshufd $0b01001110, SHASH, T3
+ pxor DATA, T2
+ pxor SHASH, T3
+
+ PCLMULQDQ 0x00 SHASH DATA # DATA = a0 * b0
+ PCLMULQDQ 0x11 SHASH T1 # T1 = a1 * b1
+ PCLMULQDQ 0x00 T3 T2 # T2 = (a1 + a0) * (b1 + b0)
+ pxor DATA, T2
+ pxor T1, T2 # T2 = a0 * b1 + a1 * b0
+
+ movaps T2, T3
+ pslldq $8, T3
+ psrldq $8, T2
+ pxor T3, DATA
+ pxor T2, T1 # <T1:DATA> is result of
+ # carry-less multiplication
+
+ # first phase of the reduction
+ movaps DATA, T3
+ psllq $1, T3
+ pxor DATA, T3
+ psllq $5, T3
+ pxor DATA, T3
+ psllq $57, T3
+ movaps T3, T2
+ pslldq $8, T2
+ psrldq $8, T3
+ pxor T2, DATA
+ pxor T3, T1
+
+ # second phase of the reduction
+ movaps DATA, T2
+ psrlq $5, T2
+ pxor DATA, T2
+ psrlq $1, T2
+ pxor DATA, T2
+ psrlq $1, T2
+ pxor T2, T1
+ pxor T1, DATA
+ ret
+
+/* void clmul_ghash_mul(char *dst, const be128 *shash) */
+ENTRY(clmul_ghash_mul)
+ movups (%rdi), DATA
+ movups (%rsi), SHASH
+ movaps .Lbswap_mask, BSWAP
+ PSHUFB_XMM BSWAP DATA
+ call __clmul_gf128mul_ble
+ PSHUFB_XMM BSWAP DATA
+ movups DATA, (%rdi)
+ ret
+
+/*
+ * void clmul_ghash_update(char *dst, const char *src, unsigned int srclen,
+ * const be128 *shash);
+ */
+ENTRY(clmul_ghash_update)
+ cmp $16, %rdx
+ jb .Lupdate_just_ret # check length
+ movaps .Lbswap_mask, BSWAP
+ movups (%rdi), DATA
+ movups (%rcx), SHASH
+ PSHUFB_XMM BSWAP DATA
+.align 4
+.Lupdate_loop:
+ movups (%rsi), IN1
+ PSHUFB_XMM BSWAP IN1
+ pxor IN1, DATA
+ call __clmul_gf128mul_ble
+ sub $16, %rdx
+ add $16, %rsi
+ cmp $16, %rdx
+ jge .Lupdate_loop
+ PSHUFB_XMM BSWAP DATA
+ movups DATA, (%rdi)
+.Lupdate_just_ret:
+ ret
+
+/*
+ * void clmul_ghash_setkey(be128 *shash, const u8 *key);
+ *
+ * Calculate hash_key << 1 mod poly
+ */
+ENTRY(clmul_ghash_setkey)
+ movaps .Lbswap_mask, BSWAP
+ movups (%rsi), %xmm0
+ PSHUFB_XMM BSWAP %xmm0
+ movaps %xmm0, %xmm1
+ psllq $1, %xmm0
+ psrlq $63, %xmm1
+ movaps %xmm1, %xmm2
+ pslldq $8, %xmm1
+ psrldq $8, %xmm2
+ por %xmm1, %xmm0
+ # reduction
+ pshufd $0b00100100, %xmm2, %xmm1
+ pcmpeqd .Ltwo_one, %xmm1
+ pand .Lpoly, %xmm1
+ pxor %xmm1, %xmm0
+ movups %xmm0, (%rdi)
+ ret
diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c
new file mode 100644
index 000000000000..cbcc8d8ea93a
--- /dev/null
+++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c
@@ -0,0 +1,333 @@
+/*
+ * Accelerated GHASH implementation with Intel PCLMULQDQ-NI
+ * instructions. This file contains glue code.
+ *
+ * Copyright (c) 2009 Intel Corp.
+ * Author: Huang Ying <ying.huang@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/crypto.h>
+#include <crypto/algapi.h>
+#include <crypto/cryptd.h>
+#include <crypto/gf128mul.h>
+#include <crypto/internal/hash.h>
+#include <asm/i387.h>
+
+#define GHASH_BLOCK_SIZE 16
+#define GHASH_DIGEST_SIZE 16
+
+void clmul_ghash_mul(char *dst, const be128 *shash);
+
+void clmul_ghash_update(char *dst, const char *src, unsigned int srclen,
+ const be128 *shash);
+
+void clmul_ghash_setkey(be128 *shash, const u8 *key);
+
+struct ghash_async_ctx {
+ struct cryptd_ahash *cryptd_tfm;
+};
+
+struct ghash_ctx {
+ be128 shash;
+};
+
+struct ghash_desc_ctx {
+ u8 buffer[GHASH_BLOCK_SIZE];
+ u32 bytes;
+};
+
+static int ghash_init(struct shash_desc *desc)
+{
+ struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
+
+ memset(dctx, 0, sizeof(*dctx));
+
+ return 0;
+}
+
+static int ghash_setkey(struct crypto_shash *tfm,
+ const u8 *key, unsigned int keylen)
+{
+ struct ghash_ctx *ctx = crypto_shash_ctx(tfm);
+
+ if (keylen != GHASH_BLOCK_SIZE) {
+ crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+ return -EINVAL;
+ }
+
+ clmul_ghash_setkey(&ctx->shash, key);
+
+ return 0;
+}
+
+static int ghash_update(struct shash_desc *desc,
+ const u8 *src, unsigned int srclen)
+{
+ struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
+ struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm);
+ u8 *dst = dctx->buffer;
+
+ kernel_fpu_begin();
+ if (dctx->bytes) {
+ int n = min(srclen, dctx->bytes);
+ u8 *pos = dst + (GHASH_BLOCK_SIZE - dctx->bytes);
+
+ dctx->bytes -= n;
+ srclen -= n;
+
+ while (n--)
+ *pos++ ^= *src++;
+
+ if (!dctx->bytes)
+ clmul_ghash_mul(dst, &ctx->shash);
+ }
+
+ clmul_ghash_update(dst, src, srclen, &ctx->shash);
+ kernel_fpu_end();
+
+ if (srclen & 0xf) {
+ src += srclen - (srclen & 0xf);
+ srclen &= 0xf;
+ dctx->bytes = GHASH_BLOCK_SIZE - srclen;
+ while (srclen--)
+ *dst++ ^= *src++;
+ }
+
+ return 0;
+}
+
+static void ghash_flush(struct ghash_ctx *ctx, struct ghash_desc_ctx *dctx)
+{
+ u8 *dst = dctx->buffer;
+
+ if (dctx->bytes) {
+ u8 *tmp = dst + (GHASH_BLOCK_SIZE - dctx->bytes);
+
+ while (dctx->bytes--)
+ *tmp++ ^= 0;
+
+ kernel_fpu_begin();
+ clmul_ghash_mul(dst, &ctx->shash);
+ kernel_fpu_end();
+ }
+
+ dctx->bytes = 0;
+}
+
+static int ghash_final(struct shash_desc *desc, u8 *dst)
+{
+ struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
+ struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm);
+ u8 *buf = dctx->buffer;
+
+ ghash_flush(ctx, dctx);
+ memcpy(dst, buf, GHASH_BLOCK_SIZE);
+
+ return 0;
+}
+
+static struct shash_alg ghash_alg = {
+ .digestsize = GHASH_DIGEST_SIZE,
+ .init = ghash_init,
+ .update = ghash_update,
+ .final = ghash_final,
+ .setkey = ghash_setkey,
+ .descsize = sizeof(struct ghash_desc_ctx),
+ .base = {
+ .cra_name = "__ghash",
+ .cra_driver_name = "__ghash-pclmulqdqni",
+ .cra_priority = 0,
+ .cra_flags = CRYPTO_ALG_TYPE_SHASH,
+ .cra_blocksize = GHASH_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct ghash_ctx),
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT(ghash_alg.base.cra_list),
+ },
+};
+
+static int ghash_async_init(struct ahash_request *req)
+{
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
+ struct ahash_request *cryptd_req = ahash_request_ctx(req);
+ struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
+
+ if (!irq_fpu_usable()) {
+ memcpy(cryptd_req, req, sizeof(*req));
+ ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base);
+ return crypto_ahash_init(cryptd_req);
+ } else {
+ struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
+ struct crypto_shash *child = cryptd_ahash_child(cryptd_tfm);
+
+ desc->tfm = child;
+ desc->flags = req->base.flags;
+ return crypto_shash_init(desc);
+ }
+}
+
+static int ghash_async_update(struct ahash_request *req)
+{
+ struct ahash_request *cryptd_req = ahash_request_ctx(req);
+
+ if (!irq_fpu_usable()) {
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
+ struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
+
+ memcpy(cryptd_req, req, sizeof(*req));
+ ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base);
+ return crypto_ahash_update(cryptd_req);
+ } else {
+ struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
+ return shash_ahash_update(req, desc);
+ }
+}
+
+static int ghash_async_final(struct ahash_request *req)
+{
+ struct ahash_request *cryptd_req = ahash_request_ctx(req);
+
+ if (!irq_fpu_usable()) {
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
+ struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
+
+ memcpy(cryptd_req, req, sizeof(*req));
+ ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base);
+ return crypto_ahash_final(cryptd_req);
+ } else {
+ struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
+ return crypto_shash_final(desc, req->result);
+ }
+}
+
+static int ghash_async_digest(struct ahash_request *req)
+{
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
+ struct ahash_request *cryptd_req = ahash_request_ctx(req);
+ struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
+
+ if (!irq_fpu_usable()) {
+ memcpy(cryptd_req, req, sizeof(*req));
+ ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base);
+ return crypto_ahash_digest(cryptd_req);
+ } else {
+ struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
+ struct crypto_shash *child = cryptd_ahash_child(cryptd_tfm);
+
+ desc->tfm = child;
+ desc->flags = req->base.flags;
+ return shash_ahash_digest(req, desc);
+ }
+}
+
+static int ghash_async_setkey(struct crypto_ahash *tfm, const u8 *key,
+ unsigned int keylen)
+{
+ struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
+ struct crypto_ahash *child = &ctx->cryptd_tfm->base;
+ int err;
+
+ crypto_ahash_clear_flags(child, CRYPTO_TFM_REQ_MASK);
+ crypto_ahash_set_flags(child, crypto_ahash_get_flags(tfm)
+ & CRYPTO_TFM_REQ_MASK);
+ err = crypto_ahash_setkey(child, key, keylen);
+ crypto_ahash_set_flags(tfm, crypto_ahash_get_flags(child)
+ & CRYPTO_TFM_RES_MASK);
+
+ return 0;
+}
+
+static int ghash_async_init_tfm(struct crypto_tfm *tfm)
+{
+ struct cryptd_ahash *cryptd_tfm;
+ struct ghash_async_ctx *ctx = crypto_tfm_ctx(tfm);
+
+ cryptd_tfm = cryptd_alloc_ahash("__ghash-pclmulqdqni", 0, 0);
+ if (IS_ERR(cryptd_tfm))
+ return PTR_ERR(cryptd_tfm);
+ ctx->cryptd_tfm = cryptd_tfm;
+ crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
+ sizeof(struct ahash_request) +
+ crypto_ahash_reqsize(&cryptd_tfm->base));
+
+ return 0;
+}
+
+static void ghash_async_exit_tfm(struct crypto_tfm *tfm)
+{
+ struct ghash_async_ctx *ctx = crypto_tfm_ctx(tfm);
+
+ cryptd_free_ahash(ctx->cryptd_tfm);
+}
+
+static struct ahash_alg ghash_async_alg = {
+ .init = ghash_async_init,
+ .update = ghash_async_update,
+ .final = ghash_async_final,
+ .setkey = ghash_async_setkey,
+ .digest = ghash_async_digest,
+ .halg = {
+ .digestsize = GHASH_DIGEST_SIZE,
+ .base = {
+ .cra_name = "ghash",
+ .cra_driver_name = "ghash-clmulni",
+ .cra_priority = 400,
+ .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC,
+ .cra_blocksize = GHASH_BLOCK_SIZE,
+ .cra_type = &crypto_ahash_type,
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT(ghash_async_alg.halg.base.cra_list),
+ .cra_init = ghash_async_init_tfm,
+ .cra_exit = ghash_async_exit_tfm,
+ },
+ },
+};
+
+static int __init ghash_pclmulqdqni_mod_init(void)
+{
+ int err;
+
+ if (!cpu_has_pclmulqdq) {
+ printk(KERN_INFO "Intel PCLMULQDQ-NI instructions are not"
+ " detected.\n");
+ return -ENODEV;
+ }
+
+ err = crypto_register_shash(&ghash_alg);
+ if (err)
+ goto err_out;
+ err = crypto_register_ahash(&ghash_async_alg);
+ if (err)
+ goto err_shash;
+
+ return 0;
+
+err_shash:
+ crypto_unregister_shash(&ghash_alg);
+err_out:
+ return err;
+}
+
+static void __exit ghash_pclmulqdqni_mod_exit(void)
+{
+ crypto_unregister_ahash(&ghash_async_alg);
+ crypto_unregister_shash(&ghash_alg);
+}
+
+module_init(ghash_pclmulqdqni_mod_init);
+module_exit(ghash_pclmulqdqni_mod_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("GHASH Message Digest Algorithm, "
+ "acclerated by PCLMULQDQ-NI");
+MODULE_ALIAS("ghash");
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 581b0568fe19..53147ad85b96 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -653,7 +653,7 @@ ia32_sys_call_table:
.quad compat_sys_writev
.quad sys_getsid
.quad sys_fdatasync
- .quad sys32_sysctl /* sysctl */
+ .quad compat_sys_sysctl /* sysctl */
.quad sys_mlock /* 150 */
.quad sys_munlock
.quad sys_mlockall
@@ -696,7 +696,7 @@ ia32_sys_call_table:
.quad quiet_ni_syscall /* streams2 */
.quad stub32_vfork /* 190 */
.quad compat_sys_getrlimit
- .quad sys32_mmap2
+ .quad sys_mmap_pgoff
.quad sys32_truncate64
.quad sys32_ftruncate64
.quad sys32_stat64 /* 195 */
@@ -841,4 +841,5 @@ ia32_sys_call_table:
.quad compat_sys_pwritev
.quad compat_sys_rt_tgsigqueueinfo /* 335 */
.quad sys_perf_event_open
+ .quad compat_sys_recvmmsg
ia32_syscall_end:
diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c
index 9f5527198825..422572c77923 100644
--- a/arch/x86/ia32/sys_ia32.c
+++ b/arch/x86/ia32/sys_ia32.c
@@ -155,9 +155,6 @@ struct mmap_arg_struct {
asmlinkage long sys32_mmap(struct mmap_arg_struct __user *arg)
{
struct mmap_arg_struct a;
- struct file *file = NULL;
- unsigned long retval;
- struct mm_struct *mm ;
if (copy_from_user(&a, arg, sizeof(a)))
return -EFAULT;
@@ -165,22 +162,8 @@ asmlinkage long sys32_mmap(struct mmap_arg_struct __user *arg)
if (a.offset & ~PAGE_MASK)
return -EINVAL;
- if (!(a.flags & MAP_ANONYMOUS)) {
- file = fget(a.fd);
- if (!file)
- return -EBADF;
- }
-
- mm = current->mm;
- down_write(&mm->mmap_sem);
- retval = do_mmap_pgoff(file, a.addr, a.len, a.prot, a.flags,
+ return sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd,
a.offset>>PAGE_SHIFT);
- if (file)
- fput(file);
-
- up_write(&mm->mmap_sem);
-
- return retval;
}
asmlinkage long sys32_mprotect(unsigned long start, size_t len,
@@ -434,62 +417,6 @@ asmlinkage long sys32_rt_sigqueueinfo(int pid, int sig,
return ret;
}
-#ifdef CONFIG_SYSCTL_SYSCALL
-struct sysctl_ia32 {
- unsigned int name;
- int nlen;
- unsigned int oldval;
- unsigned int oldlenp;
- unsigned int newval;
- unsigned int newlen;
- unsigned int __unused[4];
-};
-
-
-asmlinkage long sys32_sysctl(struct sysctl_ia32 __user *args32)
-{
- struct sysctl_ia32 a32;
- mm_segment_t old_fs = get_fs();
- void __user *oldvalp, *newvalp;
- size_t oldlen;
- int __user *namep;
- long ret;
-
- if (copy_from_user(&a32, args32, sizeof(a32)))
- return -EFAULT;
-
- /*
- * We need to pre-validate these because we have to disable
- * address checking before calling do_sysctl() because of
- * OLDLEN but we can't run the risk of the user specifying bad
- * addresses here. Well, since we're dealing with 32 bit
- * addresses, we KNOW that access_ok() will always succeed, so
- * this is an expensive NOP, but so what...
- */
- namep = compat_ptr(a32.name);
- oldvalp = compat_ptr(a32.oldval);
- newvalp = compat_ptr(a32.newval);
-
- if ((oldvalp && get_user(oldlen, (int __user *)compat_ptr(a32.oldlenp)))
- || !access_ok(VERIFY_WRITE, namep, 0)
- || !access_ok(VERIFY_WRITE, oldvalp, 0)
- || !access_ok(VERIFY_WRITE, newvalp, 0))
- return -EFAULT;
-
- set_fs(KERNEL_DS);
- lock_kernel();
- ret = do_sysctl(namep, a32.nlen, oldvalp, (size_t __user *)&oldlen,
- newvalp, (size_t) a32.newlen);
- unlock_kernel();
- set_fs(old_fs);
-
- if (oldvalp && put_user(oldlen, (int __user *)compat_ptr(a32.oldlenp)))
- return -EFAULT;
-
- return ret;
-}
-#endif
-
/* warning: next two assume little endian */
asmlinkage long sys32_pread(unsigned int fd, char __user *ubuf, u32 count,
u32 poslo, u32 poshi)
@@ -539,30 +466,6 @@ asmlinkage long sys32_sendfile(int out_fd, int in_fd,
return ret;
}
-asmlinkage long sys32_mmap2(unsigned long addr, unsigned long len,
- unsigned long prot, unsigned long flags,
- unsigned long fd, unsigned long pgoff)
-{
- struct mm_struct *mm = current->mm;
- unsigned long error;
- struct file *file = NULL;
-
- flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
- if (!(flags & MAP_ANONYMOUS)) {
- file = fget(fd);
- if (!file)
- return -EBADF;
- }
-
- down_write(&mm->mmap_sem);
- error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
- up_write(&mm->mmap_sem);
-
- if (file)
- fput(file);
- return error;
-}
-
asmlinkage long sys32_olduname(struct oldold_utsname __user *name)
{
char *arch = "x86_64";
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index 4518dc500903..60d2b2db0bc5 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -118,7 +118,7 @@ extern void acpi_restore_state_mem(void);
extern unsigned long acpi_wakeup_address;
/* early initialization routine */
-extern void acpi_reserve_bootmem(void);
+extern void acpi_reserve_wakeup_memory(void);
/*
* Check if the CPU can handle C2 and deeper
@@ -158,6 +158,7 @@ struct bootnode;
#ifdef CONFIG_ACPI_NUMA
extern int acpi_numa;
+extern int acpi_get_nodes(struct bootnode *physnodes);
extern int acpi_scan_nodes(unsigned long start, unsigned long end);
#define NR_NODE_MEMBLKS (MAX_NUMNODES*2)
extern void acpi_fake_nodes(const struct bootnode *fake_nodes,
diff --git a/arch/x86/include/asm/amd_iommu_proto.h b/arch/x86/include/asm/amd_iommu_proto.h
index 84786fb9a23b..4d817f9e6e77 100644
--- a/arch/x86/include/asm/amd_iommu_proto.h
+++ b/arch/x86/include/asm/amd_iommu_proto.h
@@ -28,7 +28,9 @@ extern void amd_iommu_flush_all_domains(void);
extern void amd_iommu_flush_all_devices(void);
extern void amd_iommu_apply_erratum_63(u16 devid);
extern void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu);
-
+extern int amd_iommu_init_devices(void);
+extern void amd_iommu_uninit_devices(void);
+extern void amd_iommu_init_notifier(void);
#ifndef CONFIG_AMD_IOMMU_STATS
static inline void amd_iommu_stats_init(void) { }
diff --git a/arch/x86/include/asm/asm-offsets.h b/arch/x86/include/asm/asm-offsets.h
new file mode 100644
index 000000000000..d370ee36a182
--- /dev/null
+++ b/arch/x86/include/asm/asm-offsets.h
@@ -0,0 +1 @@
+#include <generated/asm-offsets.h>
diff --git a/arch/x86/include/asm/cache.h b/arch/x86/include/asm/cache.h
index 549860d3be8f..2f9047cfaaca 100644
--- a/arch/x86/include/asm/cache.h
+++ b/arch/x86/include/asm/cache.h
@@ -9,12 +9,13 @@
#define __read_mostly __attribute__((__section__(".data.read_mostly")))
+#define INTERNODE_CACHE_SHIFT CONFIG_X86_INTERNODE_CACHE_SHIFT
+#define INTERNODE_CACHE_BYTES (1 << INTERNODE_CACHE_SHIFT)
+
#ifdef CONFIG_X86_VSMP
-/* vSMP Internode cacheline shift */
-#define INTERNODE_CACHE_SHIFT (12)
#ifdef CONFIG_SMP
#define __cacheline_aligned_in_smp \
- __attribute__((__aligned__(1 << (INTERNODE_CACHE_SHIFT)))) \
+ __attribute__((__aligned__(INTERNODE_CACHE_BYTES))) \
__page_aligned_data
#endif
#endif
diff --git a/arch/x86/include/asm/cacheflush.h b/arch/x86/include/asm/cacheflush.h
index b54f6afe7ec4..634c40a739a6 100644
--- a/arch/x86/include/asm/cacheflush.h
+++ b/arch/x86/include/asm/cacheflush.h
@@ -12,6 +12,7 @@ static inline void flush_cache_range(struct vm_area_struct *vma,
unsigned long start, unsigned long end) { }
static inline void flush_cache_page(struct vm_area_struct *vma,
unsigned long vmaddr, unsigned long pfn) { }
+#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0
static inline void flush_dcache_page(struct page *page) { }
static inline void flush_dcache_mmap_lock(struct address_space *mapping) { }
static inline void flush_dcache_mmap_unlock(struct address_space *mapping) { }
@@ -176,6 +177,7 @@ void clflush_cache_range(void *addr, unsigned int size);
#ifdef CONFIG_DEBUG_RODATA
void mark_rodata_ro(void);
extern const int rodata_test_data;
+extern int kernel_set_to_readonly;
void set_kernel_text_rw(void);
void set_kernel_text_ro(void);
#else
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 9cfc88b97742..637e1ec963c3 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -153,6 +153,7 @@
#define X86_FEATURE_SSE5 (6*32+11) /* SSE-5 */
#define X86_FEATURE_SKINIT (6*32+12) /* SKINIT/STGI instructions */
#define X86_FEATURE_WDT (6*32+13) /* Watchdog timer */
+#define X86_FEATURE_NODEID_MSR (6*32+19) /* NodeId MSR */
/*
* Auxiliary flags: Linux defined - For features scattered in various
@@ -248,6 +249,7 @@ extern const char * const x86_power_flags[32];
#define cpu_has_x2apic boot_cpu_has(X86_FEATURE_X2APIC)
#define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE)
#define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR)
+#define cpu_has_pclmulqdq boot_cpu_has(X86_FEATURE_PCLMULQDQ)
#if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64)
# define cpu_has_invlpg 1
diff --git a/arch/x86/include/asm/desc_defs.h b/arch/x86/include/asm/desc_defs.h
index 9d6684849fd9..278441f39856 100644
--- a/arch/x86/include/asm/desc_defs.h
+++ b/arch/x86/include/asm/desc_defs.h
@@ -12,9 +12,9 @@
#include <linux/types.h>
/*
- * FIXME: Acessing the desc_struct through its fields is more elegant,
+ * FIXME: Accessing the desc_struct through its fields is more elegant,
* and should be the one valid thing to do. However, a lot of open code
- * still touches the a and b acessors, and doing this allow us to do it
+ * still touches the a and b accessors, and doing this allow us to do it
* incrementally. We keep the signature as a struct, rather than an union,
* so we can get rid of it transparently in the future -- glommer
*/
diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h
index 0f6c02f3b7d4..ac91eed21061 100644
--- a/arch/x86/include/asm/dma-mapping.h
+++ b/arch/x86/include/asm/dma-mapping.h
@@ -67,7 +67,7 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
if (!dev->dma_mask)
return 0;
- return addr + size <= *dev->dma_mask;
+ return addr + size - 1 <= *dev->dma_mask;
}
static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h
index 40b4e614fe71..761249e396fe 100644
--- a/arch/x86/include/asm/e820.h
+++ b/arch/x86/include/asm/e820.h
@@ -61,6 +61,12 @@ struct e820map {
struct e820entry map[E820_X_MAX];
};
+#define ISA_START_ADDRESS 0xa0000
+#define ISA_END_ADDRESS 0x100000
+
+#define BIOS_BEGIN 0x000a0000
+#define BIOS_END 0x00100000
+
#ifdef __KERNEL__
/* see comment in arch/x86/kernel/e820.c */
extern struct e820map e820;
@@ -126,15 +132,18 @@ extern void e820_reserve_resources(void);
extern void e820_reserve_resources_late(void);
extern void setup_memory_map(void);
extern char *default_machine_specific_memory_setup(void);
-#endif /* __KERNEL__ */
-#endif /* __ASSEMBLY__ */
-#define ISA_START_ADDRESS 0xa0000
-#define ISA_END_ADDRESS 0x100000
-#define is_ISA_range(s, e) ((s) >= ISA_START_ADDRESS && (e) < ISA_END_ADDRESS)
+/*
+ * Returns true iff the specified range [s,e) is completely contained inside
+ * the ISA region.
+ */
+static inline bool is_ISA_range(u64 s, u64 e)
+{
+ return s >= ISA_START_ADDRESS && e <= ISA_END_ADDRESS;
+}
-#define BIOS_BEGIN 0x000a0000
-#define BIOS_END 0x00100000
+#endif /* __KERNEL__ */
+#endif /* __ASSEMBLY__ */
#ifdef __KERNEL__
#include <linux/ioport.h>
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index 456a304b8172..b4501ee223ad 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -157,19 +157,6 @@ do { \
#define compat_elf_check_arch(x) elf_check_arch_ia32(x)
-static inline void start_ia32_thread(struct pt_regs *regs, u32 ip, u32 sp)
-{
- loadsegment(fs, 0);
- loadsegment(ds, __USER32_DS);
- loadsegment(es, __USER32_DS);
- load_gs_index(0);
- regs->ip = ip;
- regs->sp = sp;
- regs->flags = X86_EFLAGS_IF;
- regs->cs = __USER32_CS;
- regs->ss = __USER32_DS;
-}
-
static inline void elf_common_init(struct thread_struct *t,
struct pt_regs *regs, const u16 ds)
{
@@ -191,11 +178,8 @@ do { \
#define COMPAT_ELF_PLAT_INIT(regs, load_addr) \
elf_common_init(&current->thread, regs, __USER_DS)
-#define compat_start_thread(regs, ip, sp) \
-do { \
- start_ia32_thread(regs, ip, sp); \
- set_fs(USER_DS); \
-} while (0)
+void start_thread_ia32(struct pt_regs *regs, u32 new_ip, u32 new_sp);
+#define compat_start_thread start_thread_ia32
#define COMPAT_SET_PERSONALITY(ex) \
do { \
@@ -255,7 +239,6 @@ extern int force_personality32;
#endif /* !CONFIG_X86_32 */
#define CORE_DUMP_USE_REGSET
-#define USE_ELF_CORE_DUMP
#define ELF_EXEC_PAGESIZE 4096
/* This is the location that an ET_DYN program is loaded if exec'ed. Typical
diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h
index f5693c81a1db..8e8ec663a98f 100644
--- a/arch/x86/include/asm/entry_arch.h
+++ b/arch/x86/include/asm/entry_arch.h
@@ -34,7 +34,7 @@ BUILD_INTERRUPT3(invalidate_interrupt7,INVALIDATE_TLB_VECTOR_START+7,
smp_invalidate_interrupt)
#endif
-BUILD_INTERRUPT(generic_interrupt, GENERIC_INTERRUPT_VECTOR)
+BUILD_INTERRUPT(x86_platform_ipi, X86_PLATFORM_IPI_VECTOR)
/*
* every pentium local APIC has two 'local interrupts', with a
diff --git a/arch/x86/include/asm/geode.h b/arch/x86/include/asm/geode.h
index ad3c2ed75481..7cd73552a4e8 100644
--- a/arch/x86/include/asm/geode.h
+++ b/arch/x86/include/asm/geode.h
@@ -12,160 +12,7 @@
#include <asm/processor.h>
#include <linux/io.h>
-
-/* Generic southbridge functions */
-
-#define GEODE_DEV_PMS 0
-#define GEODE_DEV_ACPI 1
-#define GEODE_DEV_GPIO 2
-#define GEODE_DEV_MFGPT 3
-
-extern int geode_get_dev_base(unsigned int dev);
-
-/* Useful macros */
-#define geode_pms_base() geode_get_dev_base(GEODE_DEV_PMS)
-#define geode_acpi_base() geode_get_dev_base(GEODE_DEV_ACPI)
-#define geode_gpio_base() geode_get_dev_base(GEODE_DEV_GPIO)
-#define geode_mfgpt_base() geode_get_dev_base(GEODE_DEV_MFGPT)
-
-/* MSRS */
-
-#define MSR_GLIU_P2D_RO0 0x10000029
-
-#define MSR_LX_GLD_MSR_CONFIG 0x48002001
-#define MSR_LX_MSR_PADSEL 0x48002011 /* NOT 0x48000011; the data
- * sheet has the wrong value */
-#define MSR_GLCP_SYS_RSTPLL 0x4C000014
-#define MSR_GLCP_DOTPLL 0x4C000015
-
-#define MSR_LBAR_SMB 0x5140000B
-#define MSR_LBAR_GPIO 0x5140000C
-#define MSR_LBAR_MFGPT 0x5140000D
-#define MSR_LBAR_ACPI 0x5140000E
-#define MSR_LBAR_PMS 0x5140000F
-
-#define MSR_DIVIL_SOFT_RESET 0x51400017
-
-#define MSR_PIC_YSEL_LOW 0x51400020
-#define MSR_PIC_YSEL_HIGH 0x51400021
-#define MSR_PIC_ZSEL_LOW 0x51400022
-#define MSR_PIC_ZSEL_HIGH 0x51400023
-#define MSR_PIC_IRQM_LPC 0x51400025
-
-#define MSR_MFGPT_IRQ 0x51400028
-#define MSR_MFGPT_NR 0x51400029
-#define MSR_MFGPT_SETUP 0x5140002B
-
-#define MSR_LX_SPARE_MSR 0x80000011 /* DC-specific */
-
-#define MSR_GX_GLD_MSR_CONFIG 0xC0002001
-#define MSR_GX_MSR_PADSEL 0xC0002011
-
-/* Resource Sizes */
-
-#define LBAR_GPIO_SIZE 0xFF
-#define LBAR_MFGPT_SIZE 0x40
-#define LBAR_ACPI_SIZE 0x40
-#define LBAR_PMS_SIZE 0x80
-
-/* ACPI registers (PMS block) */
-
-/*
- * PM1_EN is only valid when VSA is enabled for 16 bit reads.
- * When VSA is not enabled, *always* read both PM1_STS and PM1_EN
- * with a 32 bit read at offset 0x0
- */
-
-#define PM1_STS 0x00
-#define PM1_EN 0x02
-#define PM1_CNT 0x08
-#define PM2_CNT 0x0C
-#define PM_TMR 0x10
-#define PM_GPE0_STS 0x18
-#define PM_GPE0_EN 0x1C
-
-/* PMC registers (PMS block) */
-
-#define PM_SSD 0x00
-#define PM_SCXA 0x04
-#define PM_SCYA 0x08
-#define PM_OUT_SLPCTL 0x0C
-#define PM_SCLK 0x10
-#define PM_SED 0x1
-#define PM_SCXD 0x18
-#define PM_SCYD 0x1C
-#define PM_IN_SLPCTL 0x20
-#define PM_WKD 0x30
-#define PM_WKXD 0x34
-#define PM_RD 0x38
-#define PM_WKXA 0x3C
-#define PM_FSD 0x40
-#define PM_TSD 0x44
-#define PM_PSD 0x48
-#define PM_NWKD 0x4C
-#define PM_AWKD 0x50
-#define PM_SSC 0x54
-
-/* VSA2 magic values */
-
-#define VSA_VRC_INDEX 0xAC1C
-#define VSA_VRC_DATA 0xAC1E
-#define VSA_VR_UNLOCK 0xFC53 /* unlock virtual register */
-#define VSA_VR_SIGNATURE 0x0003
-#define VSA_VR_MEM_SIZE 0x0200
-#define AMD_VSA_SIG 0x4132 /* signature is ascii 'VSA2' */
-#define GSW_VSA_SIG 0x534d /* General Software signature */
-/* GPIO */
-
-#define GPIO_OUTPUT_VAL 0x00
-#define GPIO_OUTPUT_ENABLE 0x04
-#define GPIO_OUTPUT_OPEN_DRAIN 0x08
-#define GPIO_OUTPUT_INVERT 0x0C
-#define GPIO_OUTPUT_AUX1 0x10
-#define GPIO_OUTPUT_AUX2 0x14
-#define GPIO_PULL_UP 0x18
-#define GPIO_PULL_DOWN 0x1C
-#define GPIO_INPUT_ENABLE 0x20
-#define GPIO_INPUT_INVERT 0x24
-#define GPIO_INPUT_FILTER 0x28
-#define GPIO_INPUT_EVENT_COUNT 0x2C
-#define GPIO_READ_BACK 0x30
-#define GPIO_INPUT_AUX1 0x34
-#define GPIO_EVENTS_ENABLE 0x38
-#define GPIO_LOCK_ENABLE 0x3C
-#define GPIO_POSITIVE_EDGE_EN 0x40
-#define GPIO_NEGATIVE_EDGE_EN 0x44
-#define GPIO_POSITIVE_EDGE_STS 0x48
-#define GPIO_NEGATIVE_EDGE_STS 0x4C
-
-#define GPIO_MAP_X 0xE0
-#define GPIO_MAP_Y 0xE4
-#define GPIO_MAP_Z 0xE8
-#define GPIO_MAP_W 0xEC
-
-static inline u32 geode_gpio(unsigned int nr)
-{
- BUG_ON(nr > 28);
- return 1 << nr;
-}
-
-extern void geode_gpio_set(u32, unsigned int);
-extern void geode_gpio_clear(u32, unsigned int);
-extern int geode_gpio_isset(u32, unsigned int);
-extern void geode_gpio_setup_event(unsigned int, int, int);
-extern void geode_gpio_set_irq(unsigned int, unsigned int);
-
-static inline void geode_gpio_event_irq(unsigned int gpio, int pair)
-{
- geode_gpio_setup_event(gpio, pair, 0);
-}
-
-static inline void geode_gpio_event_pme(unsigned int gpio, int pair)
-{
- geode_gpio_setup_event(gpio, pair, 1);
-}
-
-/* Specific geode tests */
+#include <linux/cs5535.h>
static inline int is_geode_gx(void)
{
@@ -186,68 +33,4 @@ static inline int is_geode(void)
return (is_geode_gx() || is_geode_lx());
}
-#ifdef CONFIG_MGEODE_LX
-extern int geode_has_vsa2(void);
-#else
-static inline int geode_has_vsa2(void)
-{
- return 0;
-}
-#endif
-
-/* MFGPTs */
-
-#define MFGPT_MAX_TIMERS 8
-#define MFGPT_TIMER_ANY (-1)
-
-#define MFGPT_DOMAIN_WORKING 1
-#define MFGPT_DOMAIN_STANDBY 2
-#define MFGPT_DOMAIN_ANY (MFGPT_DOMAIN_WORKING | MFGPT_DOMAIN_STANDBY)
-
-#define MFGPT_CMP1 0
-#define MFGPT_CMP2 1
-
-#define MFGPT_EVENT_IRQ 0
-#define MFGPT_EVENT_NMI 1
-#define MFGPT_EVENT_RESET 3
-
-#define MFGPT_REG_CMP1 0
-#define MFGPT_REG_CMP2 2
-#define MFGPT_REG_COUNTER 4
-#define MFGPT_REG_SETUP 6
-
-#define MFGPT_SETUP_CNTEN (1 << 15)
-#define MFGPT_SETUP_CMP2 (1 << 14)
-#define MFGPT_SETUP_CMP1 (1 << 13)
-#define MFGPT_SETUP_SETUP (1 << 12)
-#define MFGPT_SETUP_STOPEN (1 << 11)
-#define MFGPT_SETUP_EXTEN (1 << 10)
-#define MFGPT_SETUP_REVEN (1 << 5)
-#define MFGPT_SETUP_CLKSEL (1 << 4)
-
-static inline void geode_mfgpt_write(int timer, u16 reg, u16 value)
-{
- u32 base = geode_get_dev_base(GEODE_DEV_MFGPT);
- outw(value, base + reg + (timer * 8));
-}
-
-static inline u16 geode_mfgpt_read(int timer, u16 reg)
-{
- u32 base = geode_get_dev_base(GEODE_DEV_MFGPT);
- return inw(base + reg + (timer * 8));
-}
-
-extern int geode_mfgpt_toggle_event(int timer, int cmp, int event, int enable);
-extern int geode_mfgpt_set_irq(int timer, int cmp, int *irq, int enable);
-extern int geode_mfgpt_alloc_timer(int timer, int domain);
-
-#define geode_mfgpt_setup_irq(t, c, i) geode_mfgpt_set_irq((t), (c), (i), 1)
-#define geode_mfgpt_release_irq(t, c, i) geode_mfgpt_set_irq((t), (c), (i), 0)
-
-#ifdef CONFIG_GEODE_MFGPT_TIMER
-extern int __init mfgpt_timer_setup(void);
-#else
-static inline int mfgpt_timer_setup(void) { return 0; }
-#endif
-
#endif /* _ASM_X86_GEODE_H */
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index 108eb6fd1ae7..0f8576427cfe 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -12,7 +12,7 @@ typedef struct {
unsigned int apic_timer_irqs; /* arch dependent */
unsigned int irq_spurious_count;
#endif
- unsigned int generic_irqs; /* arch dependent */
+ unsigned int x86_platform_ipis; /* arch dependent */
unsigned int apic_perf_irqs;
unsigned int apic_pending_irqs;
#ifdef CONFIG_SMP
diff --git a/arch/x86/include/asm/hpet.h b/arch/x86/include/asm/hpet.h
index 1c22cb05ad6a..5d89fd2a3690 100644
--- a/arch/x86/include/asm/hpet.h
+++ b/arch/x86/include/asm/hpet.h
@@ -65,11 +65,12 @@
/* hpet memory map physical address */
extern unsigned long hpet_address;
extern unsigned long force_hpet_address;
+extern u8 hpet_blockid;
extern int hpet_force_user;
extern int is_hpet_enabled(void);
extern int hpet_enable(void);
extern void hpet_disable(void);
-extern unsigned long hpet_readl(unsigned long a);
+extern unsigned int hpet_readl(unsigned int a);
extern void force_hpet_resume(void);
extern void hpet_msi_unmask(unsigned int irq);
@@ -78,9 +79,9 @@ extern void hpet_msi_write(unsigned int irq, struct msi_msg *msg);
extern void hpet_msi_read(unsigned int irq, struct msi_msg *msg);
#ifdef CONFIG_PCI_MSI
-extern int arch_setup_hpet_msi(unsigned int irq);
+extern int arch_setup_hpet_msi(unsigned int irq, unsigned int id);
#else
-static inline int arch_setup_hpet_msi(unsigned int irq)
+static inline int arch_setup_hpet_msi(unsigned int irq, unsigned int id)
{
return -EINVAL;
}
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index 6e124269fd4b..eeac829a0f44 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -27,7 +27,7 @@
/* Interrupt handlers registered during init_IRQ */
extern void apic_timer_interrupt(void);
-extern void generic_interrupt(void);
+extern void x86_platform_ipi(void);
extern void error_interrupt(void);
extern void perf_pending_interrupt(void);
@@ -103,7 +103,8 @@ extern int assign_irq_vector(int, struct irq_cfg *, const struct cpumask *);
extern void send_cleanup_vector(struct irq_cfg *);
struct irq_desc;
-extern unsigned int set_desc_affinity(struct irq_desc *, const struct cpumask *);
+extern unsigned int set_desc_affinity(struct irq_desc *, const struct cpumask *,
+ unsigned int *dest_id);
extern int IO_APIC_get_PCI_irq_vector(int bus, int devfn, int pin, struct io_apic_irq_attr *irq_attr);
extern void setup_ioapic_dest(void);
@@ -119,7 +120,7 @@ extern void eisa_set_level_irq(unsigned int irq);
/* SMP */
extern void smp_apic_timer_interrupt(struct pt_regs *);
extern void smp_spurious_interrupt(struct pt_regs *);
-extern void smp_generic_interrupt(struct pt_regs *);
+extern void smp_x86_platform_ipi(struct pt_regs *);
extern void smp_error_interrupt(struct pt_regs *);
#ifdef CONFIG_X86_IO_APIC
extern asmlinkage void smp_irq_move_cleanup_interrupt(void);
diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h
index 0b20bbb758f2..ebfb8a9e11f7 100644
--- a/arch/x86/include/asm/i387.h
+++ b/arch/x86/include/asm/i387.h
@@ -10,6 +10,8 @@
#ifndef _ASM_X86_I387_H
#define _ASM_X86_I387_H
+#ifndef __ASSEMBLY__
+
#include <linux/sched.h>
#include <linux/kernel_stat.h>
#include <linux/regset.h>
@@ -411,4 +413,9 @@ static inline unsigned short get_fpu_mxcsr(struct task_struct *tsk)
}
}
+#endif /* __ASSEMBLY__ */
+
+#define PSHUFB_XMM5_XMM0 .byte 0x66, 0x0f, 0x38, 0x00, 0xc5
+#define PSHUFB_XMM5_XMM6 .byte 0x66, 0x0f, 0x38, 0x00, 0xf5
+
#endif /* _ASM_X86_I387_H */
diff --git a/arch/x86/include/asm/inst.h b/arch/x86/include/asm/inst.h
new file mode 100644
index 000000000000..14cf526091f9
--- /dev/null
+++ b/arch/x86/include/asm/inst.h
@@ -0,0 +1,150 @@
+/*
+ * Generate .byte code for some instructions not supported by old
+ * binutils.
+ */
+#ifndef X86_ASM_INST_H
+#define X86_ASM_INST_H
+
+#ifdef __ASSEMBLY__
+
+ .macro XMM_NUM opd xmm
+ .ifc \xmm,%xmm0
+ \opd = 0
+ .endif
+ .ifc \xmm,%xmm1
+ \opd = 1
+ .endif
+ .ifc \xmm,%xmm2
+ \opd = 2
+ .endif
+ .ifc \xmm,%xmm3
+ \opd = 3
+ .endif
+ .ifc \xmm,%xmm4
+ \opd = 4
+ .endif
+ .ifc \xmm,%xmm5
+ \opd = 5
+ .endif
+ .ifc \xmm,%xmm6
+ \opd = 6
+ .endif
+ .ifc \xmm,%xmm7
+ \opd = 7
+ .endif
+ .ifc \xmm,%xmm8
+ \opd = 8
+ .endif
+ .ifc \xmm,%xmm9
+ \opd = 9
+ .endif
+ .ifc \xmm,%xmm10
+ \opd = 10
+ .endif
+ .ifc \xmm,%xmm11
+ \opd = 11
+ .endif
+ .ifc \xmm,%xmm12
+ \opd = 12
+ .endif
+ .ifc \xmm,%xmm13
+ \opd = 13
+ .endif
+ .ifc \xmm,%xmm14
+ \opd = 14
+ .endif
+ .ifc \xmm,%xmm15
+ \opd = 15
+ .endif
+ .endm
+
+ .macro PFX_OPD_SIZE
+ .byte 0x66
+ .endm
+
+ .macro PFX_REX opd1 opd2
+ .if (\opd1 | \opd2) & 8
+ .byte 0x40 | ((\opd1 & 8) >> 3) | ((\opd2 & 8) >> 1)
+ .endif
+ .endm
+
+ .macro MODRM mod opd1 opd2
+ .byte \mod | (\opd1 & 7) | ((\opd2 & 7) << 3)
+ .endm
+
+ .macro PSHUFB_XMM xmm1 xmm2
+ XMM_NUM pshufb_opd1 \xmm1
+ XMM_NUM pshufb_opd2 \xmm2
+ PFX_OPD_SIZE
+ PFX_REX pshufb_opd1 pshufb_opd2
+ .byte 0x0f, 0x38, 0x00
+ MODRM 0xc0 pshufb_opd1 pshufb_opd2
+ .endm
+
+ .macro PCLMULQDQ imm8 xmm1 xmm2
+ XMM_NUM clmul_opd1 \xmm1
+ XMM_NUM clmul_opd2 \xmm2
+ PFX_OPD_SIZE
+ PFX_REX clmul_opd1 clmul_opd2
+ .byte 0x0f, 0x3a, 0x44
+ MODRM 0xc0 clmul_opd1 clmul_opd2
+ .byte \imm8
+ .endm
+
+ .macro AESKEYGENASSIST rcon xmm1 xmm2
+ XMM_NUM aeskeygen_opd1 \xmm1
+ XMM_NUM aeskeygen_opd2 \xmm2
+ PFX_OPD_SIZE
+ PFX_REX aeskeygen_opd1 aeskeygen_opd2
+ .byte 0x0f, 0x3a, 0xdf
+ MODRM 0xc0 aeskeygen_opd1 aeskeygen_opd2
+ .byte \rcon
+ .endm
+
+ .macro AESIMC xmm1 xmm2
+ XMM_NUM aesimc_opd1 \xmm1
+ XMM_NUM aesimc_opd2 \xmm2
+ PFX_OPD_SIZE
+ PFX_REX aesimc_opd1 aesimc_opd2
+ .byte 0x0f, 0x38, 0xdb
+ MODRM 0xc0 aesimc_opd1 aesimc_opd2
+ .endm
+
+ .macro AESENC xmm1 xmm2
+ XMM_NUM aesenc_opd1 \xmm1
+ XMM_NUM aesenc_opd2 \xmm2
+ PFX_OPD_SIZE
+ PFX_REX aesenc_opd1 aesenc_opd2
+ .byte 0x0f, 0x38, 0xdc
+ MODRM 0xc0 aesenc_opd1 aesenc_opd2
+ .endm
+
+ .macro AESENCLAST xmm1 xmm2
+ XMM_NUM aesenclast_opd1 \xmm1
+ XMM_NUM aesenclast_opd2 \xmm2
+ PFX_OPD_SIZE
+ PFX_REX aesenclast_opd1 aesenclast_opd2
+ .byte 0x0f, 0x38, 0xdd
+ MODRM 0xc0 aesenclast_opd1 aesenclast_opd2
+ .endm
+
+ .macro AESDEC xmm1 xmm2
+ XMM_NUM aesdec_opd1 \xmm1
+ XMM_NUM aesdec_opd2 \xmm2
+ PFX_OPD_SIZE
+ PFX_REX aesdec_opd1 aesdec_opd2
+ .byte 0x0f, 0x38, 0xde
+ MODRM 0xc0 aesdec_opd1 aesdec_opd2
+ .endm
+
+ .macro AESDECLAST xmm1 xmm2
+ XMM_NUM aesdeclast_opd1 \xmm1
+ XMM_NUM aesdeclast_opd2 \xmm2
+ PFX_OPD_SIZE
+ PFX_REX aesdeclast_opd1 aesdeclast_opd2
+ .byte 0x0f, 0x38, 0xdf
+ MODRM 0xc0 aesdeclast_opd1 aesdeclast_opd2
+ .endm
+#endif
+
+#endif
diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h
index ffd700ff5dcb..5458380b6ef8 100644
--- a/arch/x86/include/asm/irq.h
+++ b/arch/x86/include/asm/irq.h
@@ -37,7 +37,7 @@ extern void fixup_irqs(void);
extern void irq_force_complete_move(int);
#endif
-extern void (*generic_interrupt_extension)(void);
+extern void (*x86_platform_ipi_callback)(void);
extern void native_init_IRQ(void);
extern bool handle_irq(unsigned irq, struct pt_regs *regs);
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 5b21f0ec3df2..4611f085cd43 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -106,14 +106,14 @@
/*
* Generic system vector for platform specific use
*/
-#define GENERIC_INTERRUPT_VECTOR 0xed
+#define X86_PLATFORM_IPI_VECTOR 0xed
/*
* Performance monitoring pending work vector:
*/
#define LOCAL_PENDING_VECTOR 0xec
-#define UV_BAU_MESSAGE 0xec
+#define UV_BAU_MESSAGE 0xea
/*
* Self IPI vector for machine checks
diff --git a/arch/x86/include/asm/k8.h b/arch/x86/include/asm/k8.h
index c2d1f3b58e5f..f70e60071fe8 100644
--- a/arch/x86/include/asm/k8.h
+++ b/arch/x86/include/asm/k8.h
@@ -4,13 +4,16 @@
#include <linux/pci.h>
extern struct pci_device_id k8_nb_ids[];
+struct bootnode;
extern int early_is_k8_nb(u32 value);
extern struct pci_dev **k8_northbridges;
extern int num_k8_northbridges;
extern int cache_k8_northbridges(void);
extern void k8_flush_garts(void);
-extern int k8_scan_nodes(unsigned long start, unsigned long end);
+extern int k8_get_nodes(struct bootnode *nodes);
+extern int k8_numa_init(unsigned long start_pfn, unsigned long end_pfn);
+extern int k8_scan_nodes(void);
#ifdef CONFIG_K8_NB
static inline struct pci_dev *node_to_k8_nb_misc(int node)
diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h
index 4a5fe914dc59..950df434763f 100644
--- a/arch/x86/include/asm/kvm.h
+++ b/arch/x86/include/asm/kvm.h
@@ -19,6 +19,8 @@
#define __KVM_HAVE_MSIX
#define __KVM_HAVE_MCE
#define __KVM_HAVE_PIT_STATE2
+#define __KVM_HAVE_XEN_HVM
+#define __KVM_HAVE_VCPU_EVENTS
/* Architectural interrupt line count. */
#define KVM_NR_INTERRUPTS 256
@@ -79,6 +81,7 @@ struct kvm_ioapic_state {
#define KVM_IRQCHIP_PIC_MASTER 0
#define KVM_IRQCHIP_PIC_SLAVE 1
#define KVM_IRQCHIP_IOAPIC 2
+#define KVM_NR_IRQCHIPS 3
/* for KVM_GET_REGS and KVM_SET_REGS */
struct kvm_regs {
@@ -250,4 +253,31 @@ struct kvm_reinject_control {
__u8 pit_reinject;
__u8 reserved[31];
};
+
+/* for KVM_GET/SET_VCPU_EVENTS */
+struct kvm_vcpu_events {
+ struct {
+ __u8 injected;
+ __u8 nr;
+ __u8 has_error_code;
+ __u8 pad;
+ __u32 error_code;
+ } exception;
+ struct {
+ __u8 injected;
+ __u8 nr;
+ __u8 soft;
+ __u8 pad;
+ } interrupt;
+ struct {
+ __u8 injected;
+ __u8 pending;
+ __u8 masked;
+ __u8 pad;
+ } nmi;
+ __u32 sipi_vector;
+ __u32 flags;
+ __u32 reserved[10];
+};
+
#endif /* _ASM_X86_KVM_H */
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index b7ed2c423116..7c18e1230f54 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -129,7 +129,7 @@ struct decode_cache {
u8 seg_override;
unsigned int d;
unsigned long regs[NR_VCPU_REGS];
- unsigned long eip;
+ unsigned long eip, eip_orig;
/* modrm */
u8 modrm;
u8 modrm_mod;
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index d83892226f73..4f865e8b8540 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -354,7 +354,6 @@ struct kvm_vcpu_arch {
unsigned int time_offset;
struct page *time_page;
- bool singlestep; /* guest is single stepped by KVM */
bool nmi_pending;
bool nmi_injected;
@@ -371,6 +370,10 @@ struct kvm_vcpu_arch {
u64 mcg_status;
u64 mcg_ctl;
u64 *mce_banks;
+
+ /* used for guest single stepping over the given code position */
+ u16 singlestep_cs;
+ unsigned long singlestep_rip;
};
struct kvm_mem_alias {
@@ -397,7 +400,6 @@ struct kvm_arch{
struct kvm_pic *vpic;
struct kvm_ioapic *vioapic;
struct kvm_pit *vpit;
- struct hlist_head irq_ack_notifier_list;
int vapics_in_nmi_mode;
unsigned int tss_addr;
@@ -410,8 +412,10 @@ struct kvm_arch{
gpa_t ept_identity_map_addr;
unsigned long irq_sources_bitmap;
- unsigned long irq_states[KVM_IOAPIC_NUM_PINS];
u64 vm_init_tsc;
+ s64 kvmclock_offset;
+
+ struct kvm_xen_hvm_config xen_hvm_config;
};
struct kvm_vm_stat {
@@ -461,7 +465,7 @@ struct descriptor_table {
struct kvm_x86_ops {
int (*cpu_has_kvm_support)(void); /* __init */
int (*disabled_by_bios)(void); /* __init */
- void (*hardware_enable)(void *dummy); /* __init */
+ int (*hardware_enable)(void *dummy);
void (*hardware_disable)(void *dummy);
void (*check_processor_compatibility)(void *rtn);
int (*hardware_setup)(void); /* __init */
@@ -477,8 +481,8 @@ struct kvm_x86_ops {
void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu);
void (*vcpu_put)(struct kvm_vcpu *vcpu);
- int (*set_guest_debug)(struct kvm_vcpu *vcpu,
- struct kvm_guest_debug *dbg);
+ void (*set_guest_debug)(struct kvm_vcpu *vcpu,
+ struct kvm_guest_debug *dbg);
int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata);
int (*set_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 data);
u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg);
@@ -506,8 +510,8 @@ struct kvm_x86_ops {
void (*tlb_flush)(struct kvm_vcpu *vcpu);
- void (*run)(struct kvm_vcpu *vcpu, struct kvm_run *run);
- int (*handle_exit)(struct kvm_run *run, struct kvm_vcpu *vcpu);
+ void (*run)(struct kvm_vcpu *vcpu);
+ int (*handle_exit)(struct kvm_vcpu *vcpu);
void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
void (*set_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask);
u32 (*get_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask);
@@ -519,6 +523,8 @@ struct kvm_x86_ops {
bool has_error_code, u32 error_code);
int (*interrupt_allowed)(struct kvm_vcpu *vcpu);
int (*nmi_allowed)(struct kvm_vcpu *vcpu);
+ bool (*get_nmi_mask)(struct kvm_vcpu *vcpu);
+ void (*set_nmi_mask)(struct kvm_vcpu *vcpu, bool masked);
void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
void (*enable_irq_window)(struct kvm_vcpu *vcpu);
void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
@@ -568,7 +574,7 @@ enum emulation_result {
#define EMULTYPE_NO_DECODE (1 << 0)
#define EMULTYPE_TRAP_UD (1 << 1)
#define EMULTYPE_SKIP (1 << 2)
-int emulate_instruction(struct kvm_vcpu *vcpu, struct kvm_run *run,
+int emulate_instruction(struct kvm_vcpu *vcpu,
unsigned long cr2, u16 error_code, int emulation_type);
void kvm_report_emulation_failure(struct kvm_vcpu *cvpu, const char *context);
void realmode_lgdt(struct kvm_vcpu *vcpu, u16 size, unsigned long address);
@@ -585,9 +591,9 @@ int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data);
struct x86_emulate_ctxt;
-int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
+int kvm_emulate_pio(struct kvm_vcpu *vcpu, int in,
int size, unsigned port);
-int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
+int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, int in,
int size, unsigned long count, int down,
gva_t address, int rep, unsigned port);
void kvm_emulate_cpuid(struct kvm_vcpu *vcpu);
@@ -616,6 +622,9 @@ void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l);
int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata);
int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data);
+unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu);
+void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
+
void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr);
void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code);
void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long cr2,
@@ -802,4 +811,7 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
+void kvm_define_shared_msr(unsigned index, u32 msr);
+void kvm_set_shared_msr(unsigned index, u64 val, u64 mask);
+
#endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h
index ef51b501e22a..c24ca9a56458 100644
--- a/arch/x86/include/asm/microcode.h
+++ b/arch/x86/include/asm/microcode.h
@@ -12,6 +12,8 @@ struct device;
enum ucode_state { UCODE_ERROR, UCODE_OK, UCODE_NFOUND };
struct microcode_ops {
+ void (*init)(struct device *device);
+ void (*fini)(void);
enum ucode_state (*request_microcode_user) (int cpu,
const void __user *buf, size_t size);
diff --git a/arch/x86/include/asm/mmzone_32.h b/arch/x86/include/asm/mmzone_32.h
index ede6998bd92c..91df7c51806c 100644
--- a/arch/x86/include/asm/mmzone_32.h
+++ b/arch/x86/include/asm/mmzone_32.h
@@ -47,7 +47,7 @@ static inline void resume_map_numa_kva(pgd_t *pgd) {}
/*
* generic node memory support, the following assumptions apply:
*
- * 1) memory comes in 64Mb contigious chunks which are either present or not
+ * 1) memory comes in 64Mb contiguous chunks which are either present or not
* 2) we will not have more than 64Gb in total
*
* for now assume that 64Gb is max amount of RAM for whole system
diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h
index 61d90b1331c3..d8bf23a88d05 100644
--- a/arch/x86/include/asm/mpspec.h
+++ b/arch/x86/include/asm/mpspec.h
@@ -71,12 +71,7 @@ static inline void early_get_smp_config(void)
static inline void find_smp_config(void)
{
- x86_init.mpparse.find_smp_config(1);
-}
-
-static inline void early_find_smp_config(void)
-{
- x86_init.mpparse.find_smp_config(0);
+ x86_init.mpparse.find_smp_config();
}
#ifdef CONFIG_X86_MPPARSE
@@ -89,7 +84,7 @@ extern void default_mpc_oem_bus_info(struct mpc_bus *m, char *str);
# else
# define default_mpc_oem_bus_info NULL
# endif
-extern void default_find_smp_config(unsigned int reserve);
+extern void default_find_smp_config(void);
extern void default_get_smp_config(unsigned int early);
#else
static inline void early_reserve_e820_mpc_new(void) { }
@@ -97,7 +92,7 @@ static inline void early_reserve_e820_mpc_new(void) { }
#define default_mpc_apic_id NULL
#define default_smp_read_mpc_oem NULL
#define default_mpc_oem_bus_info NULL
-#define default_find_smp_config x86_init_uint_noop
+#define default_find_smp_config x86_init_noop
#define default_get_smp_config x86_init_uint_noop
#endif
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 4ffe09b2ad75..1cd58cdbc03f 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -12,6 +12,7 @@
#define MSR_FS_BASE 0xc0000100 /* 64bit FS base */
#define MSR_GS_BASE 0xc0000101 /* 64bit GS base */
#define MSR_KERNEL_GS_BASE 0xc0000102 /* SwapGS GS shadow */
+#define MSR_TSC_AUX 0xc0000103 /* Auxiliary TSC */
/* EFER bits: */
#define _EFER_SCE 0 /* SYSCALL/SYSRET */
@@ -123,6 +124,7 @@
#define FAM10H_MMIO_CONF_BUSRANGE_SHIFT 2
#define FAM10H_MMIO_CONF_BASE_MASK 0xfffffff
#define FAM10H_MMIO_CONF_BASE_SHIFT 20
+#define MSR_FAM10H_NODE_ID 0xc001100c
/* K8 MSRs */
#define MSR_K8_TOP_MEM1 0xc001001a
diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index 5bef931f8b14..c5bc4c2d33f5 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -27,6 +27,18 @@ struct msr {
};
};
+struct msr_info {
+ u32 msr_no;
+ struct msr reg;
+ struct msr *msrs;
+ int err;
+};
+
+struct msr_regs_info {
+ u32 *regs;
+ int err;
+};
+
static inline unsigned long long native_read_tscp(unsigned int *aux)
{
unsigned long low, high;
@@ -240,9 +252,12 @@ do { \
#define checking_wrmsrl(msr, val) wrmsr_safe((msr), (u32)(val), \
(u32)((val) >> 32))
-#define write_tsc(val1, val2) wrmsr(0x10, (val1), (val2))
+#define write_tsc(val1, val2) wrmsr(MSR_IA32_TSC, (val1), (val2))
+
+#define write_rdtscp_aux(val) wrmsr(MSR_TSC_AUX, (val), 0)
-#define write_rdtscp_aux(val) wrmsr(0xc0000103, (val), 0)
+struct msr *msrs_alloc(void);
+void msrs_free(struct msr *msrs);
#ifdef CONFIG_SMP
int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h);
diff --git a/arch/x86/include/asm/olpc.h b/arch/x86/include/asm/olpc.h
index 834a30295fab..3a57385d9fa7 100644
--- a/arch/x86/include/asm/olpc.h
+++ b/arch/x86/include/asm/olpc.h
@@ -120,7 +120,7 @@ extern int olpc_ec_mask_unset(uint8_t bits);
/* GPIO assignments */
-#define OLPC_GPIO_MIC_AC geode_gpio(1)
+#define OLPC_GPIO_MIC_AC 1
#define OLPC_GPIO_DCON_IRQ geode_gpio(7)
#define OLPC_GPIO_THRM_ALRM geode_gpio(10)
#define OLPC_GPIO_SMB_CLK geode_gpio(14)
diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h
index 6473f5ccff85..642fe34b36a2 100644
--- a/arch/x86/include/asm/page_types.h
+++ b/arch/x86/include/asm/page_types.h
@@ -49,7 +49,8 @@ extern unsigned long max_pfn_mapped;
extern unsigned long init_memory_mapping(unsigned long start,
unsigned long end);
-extern void initmem_init(unsigned long start_pfn, unsigned long end_pfn);
+extern void initmem_init(unsigned long start_pfn, unsigned long end_pfn,
+ int acpi, int k8);
extern void free_initmem(void);
#endif /* !__ASSEMBLY__ */
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index efb38994859c..dd59a85a918f 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -731,34 +731,34 @@ static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx,
#if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS)
-static inline int __raw_spin_is_locked(struct raw_spinlock *lock)
+static inline int arch_spin_is_locked(struct arch_spinlock *lock)
{
return PVOP_CALL1(int, pv_lock_ops.spin_is_locked, lock);
}
-static inline int __raw_spin_is_contended(struct raw_spinlock *lock)
+static inline int arch_spin_is_contended(struct arch_spinlock *lock)
{
return PVOP_CALL1(int, pv_lock_ops.spin_is_contended, lock);
}
-#define __raw_spin_is_contended __raw_spin_is_contended
+#define arch_spin_is_contended arch_spin_is_contended
-static __always_inline void __raw_spin_lock(struct raw_spinlock *lock)
+static __always_inline void arch_spin_lock(struct arch_spinlock *lock)
{
PVOP_VCALL1(pv_lock_ops.spin_lock, lock);
}
-static __always_inline void __raw_spin_lock_flags(struct raw_spinlock *lock,
+static __always_inline void arch_spin_lock_flags(struct arch_spinlock *lock,
unsigned long flags)
{
PVOP_VCALL2(pv_lock_ops.spin_lock_flags, lock, flags);
}
-static __always_inline int __raw_spin_trylock(struct raw_spinlock *lock)
+static __always_inline int arch_spin_trylock(struct arch_spinlock *lock)
{
return PVOP_CALL1(int, pv_lock_ops.spin_trylock, lock);
}
-static __always_inline void __raw_spin_unlock(struct raw_spinlock *lock)
+static __always_inline void arch_spin_unlock(struct arch_spinlock *lock)
{
PVOP_VCALL1(pv_lock_ops.spin_unlock, lock);
}
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 9357473c8da0..b1e70d51e40c 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -318,14 +318,14 @@ struct pv_mmu_ops {
phys_addr_t phys, pgprot_t flags);
};
-struct raw_spinlock;
+struct arch_spinlock;
struct pv_lock_ops {
- int (*spin_is_locked)(struct raw_spinlock *lock);
- int (*spin_is_contended)(struct raw_spinlock *lock);
- void (*spin_lock)(struct raw_spinlock *lock);
- void (*spin_lock_flags)(struct raw_spinlock *lock, unsigned long flags);
- int (*spin_trylock)(struct raw_spinlock *lock);
- void (*spin_unlock)(struct raw_spinlock *lock);
+ int (*spin_is_locked)(struct arch_spinlock *lock);
+ int (*spin_is_contended)(struct arch_spinlock *lock);
+ void (*spin_lock)(struct arch_spinlock *lock);
+ void (*spin_lock_flags)(struct arch_spinlock *lock, unsigned long flags);
+ int (*spin_trylock)(struct arch_spinlock *lock);
+ void (*spin_unlock)(struct arch_spinlock *lock);
};
/* This contains all the paravirt structures: we get a convenient
diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h
index b399988eee3a..b4bf9a942ed0 100644
--- a/arch/x86/include/asm/pci_x86.h
+++ b/arch/x86/include/asm/pci_x86.h
@@ -118,11 +118,27 @@ extern int __init pcibios_init(void);
/* pci-mmconfig.c */
+/* "PCI MMCONFIG %04x [bus %02x-%02x]" */
+#define PCI_MMCFG_RESOURCE_NAME_LEN (22 + 4 + 2 + 2)
+
+struct pci_mmcfg_region {
+ struct list_head list;
+ struct resource res;
+ u64 address;
+ char __iomem *virt;
+ u16 segment;
+ u8 start_bus;
+ u8 end_bus;
+ char name[PCI_MMCFG_RESOURCE_NAME_LEN];
+};
+
extern int __init pci_mmcfg_arch_init(void);
extern void __init pci_mmcfg_arch_free(void);
+extern struct pci_mmcfg_region *pci_mmconfig_lookup(int segment, int bus);
+
+extern struct list_head pci_mmcfg_list;
-extern struct acpi_mcfg_allocation *pci_mmcfg_config;
-extern int pci_mmcfg_config_num;
+#define PCI_MMCFG_BUS_OFFSET(bus) ((bus) << 20)
/*
* AMD Fam10h CPUs are buggy, and cannot access MMIO config space
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index b65a36defeb7..0c44196b78ac 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -74,31 +74,31 @@ extern void __bad_percpu_size(void);
#define percpu_to_op(op, var, val) \
do { \
- typedef typeof(var) T__; \
+ typedef typeof(var) pto_T__; \
if (0) { \
- T__ tmp__; \
- tmp__ = (val); \
+ pto_T__ pto_tmp__; \
+ pto_tmp__ = (val); \
} \
switch (sizeof(var)) { \
case 1: \
asm(op "b %1,"__percpu_arg(0) \
: "+m" (var) \
- : "qi" ((T__)(val))); \
+ : "qi" ((pto_T__)(val))); \
break; \
case 2: \
asm(op "w %1,"__percpu_arg(0) \
: "+m" (var) \
- : "ri" ((T__)(val))); \
+ : "ri" ((pto_T__)(val))); \
break; \
case 4: \
asm(op "l %1,"__percpu_arg(0) \
: "+m" (var) \
- : "ri" ((T__)(val))); \
+ : "ri" ((pto_T__)(val))); \
break; \
case 8: \
asm(op "q %1,"__percpu_arg(0) \
: "+m" (var) \
- : "re" ((T__)(val))); \
+ : "re" ((pto_T__)(val))); \
break; \
default: __bad_percpu_size(); \
} \
@@ -106,31 +106,31 @@ do { \
#define percpu_from_op(op, var, constraint) \
({ \
- typeof(var) ret__; \
+ typeof(var) pfo_ret__; \
switch (sizeof(var)) { \
case 1: \
asm(op "b "__percpu_arg(1)",%0" \
- : "=q" (ret__) \
+ : "=q" (pfo_ret__) \
: constraint); \
break; \
case 2: \
asm(op "w "__percpu_arg(1)",%0" \
- : "=r" (ret__) \
+ : "=r" (pfo_ret__) \
: constraint); \
break; \
case 4: \
asm(op "l "__percpu_arg(1)",%0" \
- : "=r" (ret__) \
+ : "=r" (pfo_ret__) \
: constraint); \
break; \
case 8: \
asm(op "q "__percpu_arg(1)",%0" \
- : "=r" (ret__) \
+ : "=r" (pfo_ret__) \
: constraint); \
break; \
default: __bad_percpu_size(); \
} \
- ret__; \
+ pfo_ret__; \
})
/*
@@ -153,6 +153,84 @@ do { \
#define percpu_or(var, val) percpu_to_op("or", per_cpu__##var, val)
#define percpu_xor(var, val) percpu_to_op("xor", per_cpu__##var, val)
+#define __this_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
+#define __this_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
+#define __this_cpu_read_4(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
+
+#define __this_cpu_write_1(pcp, val) percpu_to_op("mov", (pcp), val)
+#define __this_cpu_write_2(pcp, val) percpu_to_op("mov", (pcp), val)
+#define __this_cpu_write_4(pcp, val) percpu_to_op("mov", (pcp), val)
+#define __this_cpu_add_1(pcp, val) percpu_to_op("add", (pcp), val)
+#define __this_cpu_add_2(pcp, val) percpu_to_op("add", (pcp), val)
+#define __this_cpu_add_4(pcp, val) percpu_to_op("add", (pcp), val)
+#define __this_cpu_and_1(pcp, val) percpu_to_op("and", (pcp), val)
+#define __this_cpu_and_2(pcp, val) percpu_to_op("and", (pcp), val)
+#define __this_cpu_and_4(pcp, val) percpu_to_op("and", (pcp), val)
+#define __this_cpu_or_1(pcp, val) percpu_to_op("or", (pcp), val)
+#define __this_cpu_or_2(pcp, val) percpu_to_op("or", (pcp), val)
+#define __this_cpu_or_4(pcp, val) percpu_to_op("or", (pcp), val)
+#define __this_cpu_xor_1(pcp, val) percpu_to_op("xor", (pcp), val)
+#define __this_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val)
+#define __this_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val)
+
+#define this_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
+#define this_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
+#define this_cpu_read_4(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
+#define this_cpu_write_1(pcp, val) percpu_to_op("mov", (pcp), val)
+#define this_cpu_write_2(pcp, val) percpu_to_op("mov", (pcp), val)
+#define this_cpu_write_4(pcp, val) percpu_to_op("mov", (pcp), val)
+#define this_cpu_add_1(pcp, val) percpu_to_op("add", (pcp), val)
+#define this_cpu_add_2(pcp, val) percpu_to_op("add", (pcp), val)
+#define this_cpu_add_4(pcp, val) percpu_to_op("add", (pcp), val)
+#define this_cpu_and_1(pcp, val) percpu_to_op("and", (pcp), val)
+#define this_cpu_and_2(pcp, val) percpu_to_op("and", (pcp), val)
+#define this_cpu_and_4(pcp, val) percpu_to_op("and", (pcp), val)
+#define this_cpu_or_1(pcp, val) percpu_to_op("or", (pcp), val)
+#define this_cpu_or_2(pcp, val) percpu_to_op("or", (pcp), val)
+#define this_cpu_or_4(pcp, val) percpu_to_op("or", (pcp), val)
+#define this_cpu_xor_1(pcp, val) percpu_to_op("xor", (pcp), val)
+#define this_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val)
+#define this_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val)
+
+#define irqsafe_cpu_add_1(pcp, val) percpu_to_op("add", (pcp), val)
+#define irqsafe_cpu_add_2(pcp, val) percpu_to_op("add", (pcp), val)
+#define irqsafe_cpu_add_4(pcp, val) percpu_to_op("add", (pcp), val)
+#define irqsafe_cpu_and_1(pcp, val) percpu_to_op("and", (pcp), val)
+#define irqsafe_cpu_and_2(pcp, val) percpu_to_op("and", (pcp), val)
+#define irqsafe_cpu_and_4(pcp, val) percpu_to_op("and", (pcp), val)
+#define irqsafe_cpu_or_1(pcp, val) percpu_to_op("or", (pcp), val)
+#define irqsafe_cpu_or_2(pcp, val) percpu_to_op("or", (pcp), val)
+#define irqsafe_cpu_or_4(pcp, val) percpu_to_op("or", (pcp), val)
+#define irqsafe_cpu_xor_1(pcp, val) percpu_to_op("xor", (pcp), val)
+#define irqsafe_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val)
+#define irqsafe_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val)
+
+/*
+ * Per cpu atomic 64 bit operations are only available under 64 bit.
+ * 32 bit must fall back to generic operations.
+ */
+#ifdef CONFIG_X86_64
+#define __this_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
+#define __this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val)
+#define __this_cpu_add_8(pcp, val) percpu_to_op("add", (pcp), val)
+#define __this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val)
+#define __this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val)
+#define __this_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val)
+
+#define this_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
+#define this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val)
+#define this_cpu_add_8(pcp, val) percpu_to_op("add", (pcp), val)
+#define this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val)
+#define this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val)
+#define this_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val)
+
+#define irqsafe_cpu_add_8(pcp, val) percpu_to_op("add", (pcp), val)
+#define irqsafe_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val)
+#define irqsafe_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val)
+#define irqsafe_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val)
+
+#endif
+
/* This is not atomic against other CPUs -- CPU preemption needs to be off */
#define x86_test_and_clear_bit_percpu(bit, var) \
({ \
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index af6fd360ab35..a34c785c5a63 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -16,6 +16,8 @@
#ifndef __ASSEMBLY__
+#include <asm/x86_init.h>
+
/*
* ZERO_PAGE is a global shared page that is always zero: used
* for zero-mapped memory areas etc..
@@ -270,9 +272,9 @@ static inline int is_new_memtype_allowed(u64 paddr, unsigned long size,
unsigned long new_flags)
{
/*
- * PAT type is always WB for ISA. So no need to check.
+ * PAT type is always WB for untracked ranges, so no need to check.
*/
- if (is_ISA_range(paddr, paddr + size - 1))
+ if (x86_platform.is_untracked_pat_range(paddr, paddr + size))
return 1;
/*
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 6f8ec1c37e0a..fc801bab1b3b 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -181,7 +181,7 @@ static inline void native_cpuid(unsigned int *eax, unsigned int *ebx,
unsigned int *ecx, unsigned int *edx)
{
/* ecx is often an input as well as an output. */
- asm("cpuid"
+ asm volatile("cpuid"
: "=a" (*eax),
"=b" (*ebx),
"=c" (*ecx),
diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h
index 621f56d73121..4009f6534f52 100644
--- a/arch/x86/include/asm/proto.h
+++ b/arch/x86/include/asm/proto.h
@@ -5,18 +5,19 @@
/* misc architecture specific prototypes */
-extern void early_idt_handler(void);
+void early_idt_handler(void);
-extern void system_call(void);
-extern void syscall_init(void);
+void system_call(void);
+void syscall_init(void);
-extern void ia32_syscall(void);
-extern void ia32_cstar_target(void);
-extern void ia32_sysenter_target(void);
+void ia32_syscall(void);
+void ia32_cstar_target(void);
+void ia32_sysenter_target(void);
-extern void syscall32_cpu_init(void);
+void syscall32_cpu_init(void);
-extern void check_efer(void);
+void x86_configure_nx(void);
+void x86_report_nx(void);
extern int reboot_force;
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 3d11fd0f44c5..9d369f680321 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -292,6 +292,8 @@ extern void user_enable_block_step(struct task_struct *);
#define arch_has_block_step() (boot_cpu_data.x86 >= 6)
#endif
+#define ARCH_HAS_USER_SINGLE_STEP_INFO
+
struct user_desc;
extern int do_get_thread_area(struct task_struct *p, int idx,
struct user_desc __user *info);
diff --git a/arch/x86/include/asm/sections.h b/arch/x86/include/asm/sections.h
index 1b7ee5d673c2..0a5242428659 100644
--- a/arch/x86/include/asm/sections.h
+++ b/arch/x86/include/asm/sections.h
@@ -2,7 +2,13 @@
#define _ASM_X86_SECTIONS_H
#include <asm-generic/sections.h>
+#include <asm/uaccess.h>
extern char __brk_base[], __brk_limit[];
+extern struct exception_table_entry __stop___ex_table[];
+
+#if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA)
+extern char __end_rodata_hpage_align[];
+#endif
#endif /* _ASM_X86_SECTIONS_H */
diff --git a/arch/x86/include/asm/sigcontext.h b/arch/x86/include/asm/sigcontext.h
index 72e5a4491661..04459d25e66e 100644
--- a/arch/x86/include/asm/sigcontext.h
+++ b/arch/x86/include/asm/sigcontext.h
@@ -124,7 +124,7 @@ struct sigcontext {
* fpstate is really (struct _fpstate *) or (struct _xstate *)
* depending on the FP_XSTATE_MAGIC1 encoded in the SW reserved
* bytes of (struct _fpstate) and FP_XSTATE_MAGIC2 present at the end
- * of extended memory layout. See comments at the defintion of
+ * of extended memory layout. See comments at the definition of
* (struct _fpx_sw_bytes)
*/
void __user *fpstate; /* zero when no FPU/extended context */
@@ -219,7 +219,7 @@ struct sigcontext {
* fpstate is really (struct _fpstate *) or (struct _xstate *)
* depending on the FP_XSTATE_MAGIC1 encoded in the SW reserved
* bytes of (struct _fpstate) and FP_XSTATE_MAGIC2 present at the end
- * of extended memory layout. See comments at the defintion of
+ * of extended memory layout. See comments at the definition of
* (struct _fpx_sw_bytes)
*/
void __user *fpstate; /* zero when no FPU/extended context */
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h
index 4e77853321db..3089f70c0c52 100644
--- a/arch/x86/include/asm/spinlock.h
+++ b/arch/x86/include/asm/spinlock.h
@@ -58,7 +58,7 @@
#if (NR_CPUS < 256)
#define TICKET_SHIFT 8
-static __always_inline void __ticket_spin_lock(raw_spinlock_t *lock)
+static __always_inline void __ticket_spin_lock(arch_spinlock_t *lock)
{
short inc = 0x0100;
@@ -77,7 +77,7 @@ static __always_inline void __ticket_spin_lock(raw_spinlock_t *lock)
: "memory", "cc");
}
-static __always_inline int __ticket_spin_trylock(raw_spinlock_t *lock)
+static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock)
{
int tmp, new;
@@ -96,7 +96,7 @@ static __always_inline int __ticket_spin_trylock(raw_spinlock_t *lock)
return tmp;
}
-static __always_inline void __ticket_spin_unlock(raw_spinlock_t *lock)
+static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock)
{
asm volatile(UNLOCK_LOCK_PREFIX "incb %0"
: "+m" (lock->slock)
@@ -106,7 +106,7 @@ static __always_inline void __ticket_spin_unlock(raw_spinlock_t *lock)
#else
#define TICKET_SHIFT 16
-static __always_inline void __ticket_spin_lock(raw_spinlock_t *lock)
+static __always_inline void __ticket_spin_lock(arch_spinlock_t *lock)
{
int inc = 0x00010000;
int tmp;
@@ -127,7 +127,7 @@ static __always_inline void __ticket_spin_lock(raw_spinlock_t *lock)
: "memory", "cc");
}
-static __always_inline int __ticket_spin_trylock(raw_spinlock_t *lock)
+static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock)
{
int tmp;
int new;
@@ -149,7 +149,7 @@ static __always_inline int __ticket_spin_trylock(raw_spinlock_t *lock)
return tmp;
}
-static __always_inline void __ticket_spin_unlock(raw_spinlock_t *lock)
+static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock)
{
asm volatile(UNLOCK_LOCK_PREFIX "incw %0"
: "+m" (lock->slock)
@@ -158,14 +158,14 @@ static __always_inline void __ticket_spin_unlock(raw_spinlock_t *lock)
}
#endif
-static inline int __ticket_spin_is_locked(raw_spinlock_t *lock)
+static inline int __ticket_spin_is_locked(arch_spinlock_t *lock)
{
int tmp = ACCESS_ONCE(lock->slock);
return !!(((tmp >> TICKET_SHIFT) ^ tmp) & ((1 << TICKET_SHIFT) - 1));
}
-static inline int __ticket_spin_is_contended(raw_spinlock_t *lock)
+static inline int __ticket_spin_is_contended(arch_spinlock_t *lock)
{
int tmp = ACCESS_ONCE(lock->slock);
@@ -174,43 +174,43 @@ static inline int __ticket_spin_is_contended(raw_spinlock_t *lock)
#ifndef CONFIG_PARAVIRT_SPINLOCKS
-static inline int __raw_spin_is_locked(raw_spinlock_t *lock)
+static inline int arch_spin_is_locked(arch_spinlock_t *lock)
{
return __ticket_spin_is_locked(lock);
}
-static inline int __raw_spin_is_contended(raw_spinlock_t *lock)
+static inline int arch_spin_is_contended(arch_spinlock_t *lock)
{
return __ticket_spin_is_contended(lock);
}
-#define __raw_spin_is_contended __raw_spin_is_contended
+#define arch_spin_is_contended arch_spin_is_contended
-static __always_inline void __raw_spin_lock(raw_spinlock_t *lock)
+static __always_inline void arch_spin_lock(arch_spinlock_t *lock)
{
__ticket_spin_lock(lock);
}
-static __always_inline int __raw_spin_trylock(raw_spinlock_t *lock)
+static __always_inline int arch_spin_trylock(arch_spinlock_t *lock)
{
return __ticket_spin_trylock(lock);
}
-static __always_inline void __raw_spin_unlock(raw_spinlock_t *lock)
+static __always_inline void arch_spin_unlock(arch_spinlock_t *lock)
{
__ticket_spin_unlock(lock);
}
-static __always_inline void __raw_spin_lock_flags(raw_spinlock_t *lock,
+static __always_inline void arch_spin_lock_flags(arch_spinlock_t *lock,
unsigned long flags)
{
- __raw_spin_lock(lock);
+ arch_spin_lock(lock);
}
#endif /* CONFIG_PARAVIRT_SPINLOCKS */
-static inline void __raw_spin_unlock_wait(raw_spinlock_t *lock)
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
{
- while (__raw_spin_is_locked(lock))
+ while (arch_spin_is_locked(lock))
cpu_relax();
}
@@ -232,7 +232,7 @@ static inline void __raw_spin_unlock_wait(raw_spinlock_t *lock)
* read_can_lock - would read_trylock() succeed?
* @lock: the rwlock in question.
*/
-static inline int __raw_read_can_lock(raw_rwlock_t *lock)
+static inline int arch_read_can_lock(arch_rwlock_t *lock)
{
return (int)(lock)->lock > 0;
}
@@ -241,12 +241,12 @@ static inline int __raw_read_can_lock(raw_rwlock_t *lock)
* write_can_lock - would write_trylock() succeed?
* @lock: the rwlock in question.
*/
-static inline int __raw_write_can_lock(raw_rwlock_t *lock)
+static inline int arch_write_can_lock(arch_rwlock_t *lock)
{
return (lock)->lock == RW_LOCK_BIAS;
}
-static inline void __raw_read_lock(raw_rwlock_t *rw)
+static inline void arch_read_lock(arch_rwlock_t *rw)
{
asm volatile(LOCK_PREFIX " subl $1,(%0)\n\t"
"jns 1f\n"
@@ -255,7 +255,7 @@ static inline void __raw_read_lock(raw_rwlock_t *rw)
::LOCK_PTR_REG (rw) : "memory");
}
-static inline void __raw_write_lock(raw_rwlock_t *rw)
+static inline void arch_write_lock(arch_rwlock_t *rw)
{
asm volatile(LOCK_PREFIX " subl %1,(%0)\n\t"
"jz 1f\n"
@@ -264,7 +264,7 @@ static inline void __raw_write_lock(raw_rwlock_t *rw)
::LOCK_PTR_REG (rw), "i" (RW_LOCK_BIAS) : "memory");
}
-static inline int __raw_read_trylock(raw_rwlock_t *lock)
+static inline int arch_read_trylock(arch_rwlock_t *lock)
{
atomic_t *count = (atomic_t *)lock;
@@ -274,7 +274,7 @@ static inline int __raw_read_trylock(raw_rwlock_t *lock)
return 0;
}
-static inline int __raw_write_trylock(raw_rwlock_t *lock)
+static inline int arch_write_trylock(arch_rwlock_t *lock)
{
atomic_t *count = (atomic_t *)lock;
@@ -284,23 +284,23 @@ static inline int __raw_write_trylock(raw_rwlock_t *lock)
return 0;
}
-static inline void __raw_read_unlock(raw_rwlock_t *rw)
+static inline void arch_read_unlock(arch_rwlock_t *rw)
{
asm volatile(LOCK_PREFIX "incl %0" :"+m" (rw->lock) : : "memory");
}
-static inline void __raw_write_unlock(raw_rwlock_t *rw)
+static inline void arch_write_unlock(arch_rwlock_t *rw)
{
asm volatile(LOCK_PREFIX "addl %1, %0"
: "+m" (rw->lock) : "i" (RW_LOCK_BIAS) : "memory");
}
-#define __raw_read_lock_flags(lock, flags) __raw_read_lock(lock)
-#define __raw_write_lock_flags(lock, flags) __raw_write_lock(lock)
+#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
+#define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
-#define _raw_spin_relax(lock) cpu_relax()
-#define _raw_read_relax(lock) cpu_relax()
-#define _raw_write_relax(lock) cpu_relax()
+#define arch_spin_relax(lock) cpu_relax()
+#define arch_read_relax(lock) cpu_relax()
+#define arch_write_relax(lock) cpu_relax()
/* The {read|write|spin}_lock() on x86 are full memory barriers. */
static inline void smp_mb__after_lock(void) { }
diff --git a/arch/x86/include/asm/spinlock_types.h b/arch/x86/include/asm/spinlock_types.h
index 845f81c87091..dcb48b2edc11 100644
--- a/arch/x86/include/asm/spinlock_types.h
+++ b/arch/x86/include/asm/spinlock_types.h
@@ -5,16 +5,16 @@
# error "please don't include this file directly"
#endif
-typedef struct raw_spinlock {
+typedef struct arch_spinlock {
unsigned int slock;
-} raw_spinlock_t;
+} arch_spinlock_t;
-#define __RAW_SPIN_LOCK_UNLOCKED { 0 }
+#define __ARCH_SPIN_LOCK_UNLOCKED { 0 }
typedef struct {
unsigned int lock;
-} raw_rwlock_t;
+} arch_rwlock_t;
-#define __RAW_RW_LOCK_UNLOCKED { RW_LOCK_BIAS }
+#define __ARCH_RW_LOCK_UNLOCKED { RW_LOCK_BIAS }
#endif /* _ASM_X86_SPINLOCK_TYPES_H */
diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h
index cf86a5e73815..35e89122a42f 100644
--- a/arch/x86/include/asm/stacktrace.h
+++ b/arch/x86/include/asm/stacktrace.h
@@ -5,6 +5,29 @@ extern int kstack_depth_to_print;
int x86_is_stack_id(int id, char *name);
+struct thread_info;
+struct stacktrace_ops;
+
+typedef unsigned long (*walk_stack_t)(struct thread_info *tinfo,
+ unsigned long *stack,
+ unsigned long bp,
+ const struct stacktrace_ops *ops,
+ void *data,
+ unsigned long *end,
+ int *graph);
+
+extern unsigned long
+print_context_stack(struct thread_info *tinfo,
+ unsigned long *stack, unsigned long bp,
+ const struct stacktrace_ops *ops, void *data,
+ unsigned long *end, int *graph);
+
+extern unsigned long
+print_context_stack_bp(struct thread_info *tinfo,
+ unsigned long *stack, unsigned long bp,
+ const struct stacktrace_ops *ops, void *data,
+ unsigned long *end, int *graph);
+
/* Generic stack tracer with callbacks */
struct stacktrace_ops {
@@ -14,6 +37,7 @@ struct stacktrace_ops {
void (*address)(void *data, unsigned long address, int reliable);
/* On negative return stop dumping */
int (*stack)(void *data, char *name);
+ walk_stack_t walk_stack;
};
void dump_trace(struct task_struct *tsk, struct pt_regs *regs,
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index 85574b7c1bc1..1fecb7e61130 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -57,7 +57,8 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
u16 intercept_dr_write;
u32 intercept_exceptions;
u64 intercept;
- u8 reserved_1[44];
+ u8 reserved_1[42];
+ u16 pause_filter_count;
u64 iopm_base_pa;
u64 msrpm_base_pa;
u64 tsc_offset;
diff --git a/arch/x86/include/asm/swiotlb.h b/arch/x86/include/asm/swiotlb.h
index 87ffcb12a1b8..8085277e1b8b 100644
--- a/arch/x86/include/asm/swiotlb.h
+++ b/arch/x86/include/asm/swiotlb.h
@@ -5,13 +5,17 @@
#ifdef CONFIG_SWIOTLB
extern int swiotlb;
-extern int pci_swiotlb_init(void);
+extern int __init pci_swiotlb_detect(void);
+extern void __init pci_swiotlb_init(void);
#else
#define swiotlb 0
-static inline int pci_swiotlb_init(void)
+static inline int pci_swiotlb_detect(void)
{
return 0;
}
+static inline void pci_swiotlb_init(void)
+{
+}
#endif
static inline void dma_mark_clean(void *addr, size_t size) {}
diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h
index 72a6dcd1299b..d5f69045c100 100644
--- a/arch/x86/include/asm/sys_ia32.h
+++ b/arch/x86/include/asm/sys_ia32.h
@@ -30,7 +30,6 @@ struct mmap_arg_struct;
asmlinkage long sys32_mmap(struct mmap_arg_struct __user *);
asmlinkage long sys32_mprotect(unsigned long, size_t, unsigned long);
-asmlinkage long sys32_pipe(int __user *);
struct sigaction32;
struct old_sigaction32;
asmlinkage long sys32_rt_sigaction(int, struct sigaction32 __user *,
@@ -51,20 +50,12 @@ asmlinkage long sys32_sched_rr_get_interval(compat_pid_t,
asmlinkage long sys32_rt_sigpending(compat_sigset_t __user *, compat_size_t);
asmlinkage long sys32_rt_sigqueueinfo(int, int, compat_siginfo_t __user *);
-#ifdef CONFIG_SYSCTL_SYSCALL
-struct sysctl_ia32;
-asmlinkage long sys32_sysctl(struct sysctl_ia32 __user *);
-#endif
-
asmlinkage long sys32_pread(unsigned int, char __user *, u32, u32, u32);
asmlinkage long sys32_pwrite(unsigned int, char __user *, u32, u32, u32);
asmlinkage long sys32_personality(unsigned long);
asmlinkage long sys32_sendfile(int, int, compat_off_t __user *, s32);
-asmlinkage long sys32_mmap2(unsigned long, unsigned long, unsigned long,
- unsigned long, unsigned long, unsigned long);
-
struct oldold_utsname;
struct old_utsname;
asmlinkage long sys32_olduname(struct oldold_utsname __user *);
diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h
index 372b76edd63f..8868b9420b0e 100644
--- a/arch/x86/include/asm/syscalls.h
+++ b/arch/x86/include/asm/syscalls.h
@@ -18,16 +18,24 @@
/* Common in X86_32 and X86_64 */
/* kernel/ioport.c */
asmlinkage long sys_ioperm(unsigned long, unsigned long, int);
+long sys_iopl(unsigned int, struct pt_regs *);
/* kernel/process.c */
int sys_fork(struct pt_regs *);
int sys_vfork(struct pt_regs *);
+long sys_execve(char __user *, char __user * __user *,
+ char __user * __user *, struct pt_regs *);
+long sys_clone(unsigned long, unsigned long, void __user *,
+ void __user *, struct pt_regs *);
/* kernel/ldt.c */
asmlinkage int sys_modify_ldt(int, void __user *, unsigned long);
/* kernel/signal.c */
long sys_rt_sigreturn(struct pt_regs *);
+long sys_sigaltstack(const stack_t __user *, stack_t __user *,
+ struct pt_regs *);
+
/* kernel/tls.c */
asmlinkage int sys_set_thread_area(struct user_desc __user *);
@@ -35,18 +43,11 @@ asmlinkage int sys_get_thread_area(struct user_desc __user *);
/* X86_32 only */
#ifdef CONFIG_X86_32
-/* kernel/ioport.c */
-long sys_iopl(struct pt_regs *);
-
-/* kernel/process_32.c */
-int sys_clone(struct pt_regs *);
-int sys_execve(struct pt_regs *);
/* kernel/signal.c */
asmlinkage int sys_sigsuspend(int, int, old_sigset_t);
asmlinkage int sys_sigaction(int, const struct old_sigaction __user *,
struct old_sigaction __user *);
-int sys_sigaltstack(struct pt_regs *);
unsigned long sys_sigreturn(struct pt_regs *);
/* kernel/sys_i386_32.c */
@@ -55,8 +56,6 @@ struct sel_arg_struct;
struct oldold_utsname;
struct old_utsname;
-asmlinkage long sys_mmap2(unsigned long, unsigned long, unsigned long,
- unsigned long, unsigned long, unsigned long);
asmlinkage int old_mmap(struct mmap_arg_struct __user *);
asmlinkage int old_select(struct sel_arg_struct __user *);
asmlinkage int sys_ipc(uint, int, int, int, void __user *, long);
@@ -64,28 +63,15 @@ asmlinkage int sys_uname(struct old_utsname __user *);
asmlinkage int sys_olduname(struct oldold_utsname __user *);
/* kernel/vm86_32.c */
-int sys_vm86old(struct pt_regs *);
-int sys_vm86(struct pt_regs *);
+int sys_vm86old(struct vm86_struct __user *, struct pt_regs *);
+int sys_vm86(unsigned long, unsigned long, struct pt_regs *);
#else /* CONFIG_X86_32 */
/* X86_64 only */
-/* kernel/ioport.c */
-asmlinkage long sys_iopl(unsigned int, struct pt_regs *);
-
/* kernel/process_64.c */
-asmlinkage long sys_clone(unsigned long, unsigned long,
- void __user *, void __user *,
- struct pt_regs *);
-asmlinkage long sys_execve(char __user *, char __user * __user *,
- char __user * __user *,
- struct pt_regs *);
long sys_arch_prctl(int, unsigned long);
-/* kernel/signal.c */
-asmlinkage long sys_sigaltstack(const stack_t __user *, stack_t __user *,
- struct pt_regs *);
-
/* kernel/sys_x86_64.c */
struct new_utsname;
diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h
index 022a84386de8..ecb544e65382 100644
--- a/arch/x86/include/asm/system.h
+++ b/arch/x86/include/asm/system.h
@@ -23,6 +23,7 @@ struct task_struct *__switch_to(struct task_struct *prev,
struct tss_struct;
void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
struct tss_struct *tss);
+extern void show_regs_common(void);
#ifdef CONFIG_X86_32
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index d27d0a2fec4c..375c917c37d2 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -83,6 +83,7 @@ struct thread_info {
#define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */
#define TIF_SECCOMP 8 /* secure computing */
#define TIF_MCE_NOTIFY 10 /* notify userspace of an MCE */
+#define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */
#define TIF_NOTSC 16 /* TSC is not accessible in userland */
#define TIF_IA32 17 /* 32bit process */
#define TIF_FORK 18 /* ret_from_fork */
@@ -107,6 +108,7 @@ struct thread_info {
#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
#define _TIF_SECCOMP (1 << TIF_SECCOMP)
#define _TIF_MCE_NOTIFY (1 << TIF_MCE_NOTIFY)
+#define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY)
#define _TIF_NOTSC (1 << TIF_NOTSC)
#define _TIF_IA32 (1 << TIF_IA32)
#define _TIF_FORK (1 << TIF_FORK)
@@ -142,13 +144,14 @@ struct thread_info {
/* Only used for 64 bit */
#define _TIF_DO_NOTIFY_MASK \
- (_TIF_SIGPENDING|_TIF_MCE_NOTIFY|_TIF_NOTIFY_RESUME)
+ (_TIF_SIGPENDING | _TIF_MCE_NOTIFY | _TIF_NOTIFY_RESUME | \
+ _TIF_USER_RETURN_NOTIFY)
/* flags to check in __switch_to() */
#define _TIF_WORK_CTXSW \
(_TIF_IO_BITMAP|_TIF_DEBUGCTLMSR|_TIF_DS_AREA_MSR|_TIF_NOTSC)
-#define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW
+#define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG)
#define PREEMPT_ACTIVE 0x10000000
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 40e37b10c6c0..c5087d796587 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -35,11 +35,16 @@
# endif
#endif
-/* Node not present */
-#define NUMA_NO_NODE (-1)
+/*
+ * to preserve the visibility of NUMA_NO_NODE definition,
+ * moved to there from here. May be used independent of
+ * CONFIG_NUMA.
+ */
+#include <linux/numa.h>
#ifdef CONFIG_NUMA
#include <linux/cpumask.h>
+
#include <asm/mpspec.h>
#ifdef CONFIG_X86_32
diff --git a/arch/x86/include/asm/trampoline.h b/arch/x86/include/asm/trampoline.h
index 90f06c25221d..cb507bb05d79 100644
--- a/arch/x86/include/asm/trampoline.h
+++ b/arch/x86/include/asm/trampoline.h
@@ -16,7 +16,6 @@ extern unsigned long initial_code;
extern unsigned long initial_gs;
#define TRAMPOLINE_SIZE roundup(trampoline_end - trampoline_data, PAGE_SIZE)
-#define TRAMPOLINE_BASE 0x6000
extern unsigned long setup_trampoline(void);
extern void __init reserve_trampoline_memory(void);
diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h
index 6fb3c209a7e3..3baf379fa840 100644
--- a/arch/x86/include/asm/unistd_32.h
+++ b/arch/x86/include/asm/unistd_32.h
@@ -342,10 +342,11 @@
#define __NR_pwritev 334
#define __NR_rt_tgsigqueueinfo 335
#define __NR_perf_event_open 336
+#define __NR_recvmmsg 337
#ifdef __KERNEL__
-#define NR_syscalls 337
+#define NR_syscalls 338
#define __ARCH_WANT_IPC_PARSE_VERSION
#define __ARCH_WANT_OLD_READDIR
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h
index 8d3ad0adbc68..4843f7ba754a 100644
--- a/arch/x86/include/asm/unistd_64.h
+++ b/arch/x86/include/asm/unistd_64.h
@@ -661,6 +661,8 @@ __SYSCALL(__NR_pwritev, sys_pwritev)
__SYSCALL(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo)
#define __NR_perf_event_open 298
__SYSCALL(__NR_perf_event_open, sys_perf_event_open)
+#define __NR_recvmmsg 299
+__SYSCALL(__NR_recvmmsg, sys_recvmmsg)
#ifndef __NO_STUBS
#define __ARCH_WANT_OLD_READDIR
diff --git a/arch/x86/include/asm/uv/bios.h b/arch/x86/include/asm/uv/bios.h
index 7ed17ff502b9..2751f3075d8b 100644
--- a/arch/x86/include/asm/uv/bios.h
+++ b/arch/x86/include/asm/uv/bios.h
@@ -76,15 +76,6 @@ union partition_info_u {
};
};
-union uv_watchlist_u {
- u64 val;
- struct {
- u64 blade : 16,
- size : 32,
- filler : 16;
- };
-};
-
enum uv_memprotect {
UV_MEMPROT_RESTRICT_ACCESS,
UV_MEMPROT_ALLOW_AMO,
@@ -100,7 +91,7 @@ extern s64 uv_bios_call_reentrant(enum uv_bios_cmd, u64, u64, u64, u64, u64);
extern s64 uv_bios_get_sn_info(int, int *, long *, long *, long *);
extern s64 uv_bios_freq_base(u64, u64 *);
-extern int uv_bios_mq_watchlist_alloc(int, unsigned long, unsigned int,
+extern int uv_bios_mq_watchlist_alloc(unsigned long, unsigned int,
unsigned long *);
extern int uv_bios_mq_watchlist_free(int, int);
extern s64 uv_bios_change_memprotect(u64, u64, enum uv_memprotect);
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h
index 80e2984f521c..b414d2b401f6 100644
--- a/arch/x86/include/asm/uv/uv_bau.h
+++ b/arch/x86/include/asm/uv/uv_bau.h
@@ -55,7 +55,7 @@
#define DESC_STATUS_SOURCE_TIMEOUT 3
/*
- * source side threshholds at which message retries print a warning
+ * source side thresholds at which message retries print a warning
*/
#define SOURCE_TIMEOUT_LIMIT 20
#define DESTINATION_TIMEOUT_LIMIT 20
diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h
index d1414af98559..811bfabc80b7 100644
--- a/arch/x86/include/asm/uv/uv_hub.h
+++ b/arch/x86/include/asm/uv/uv_hub.h
@@ -172,6 +172,8 @@ DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info);
#define UV_LOCAL_MMR_SIZE (64UL * 1024 * 1024)
#define UV_GLOBAL_MMR32_SIZE (64UL * 1024 * 1024)
+#define UV_GLOBAL_GRU_MMR_BASE 0x4000000
+
#define UV_GLOBAL_MMR32_PNODE_SHIFT 15
#define UV_GLOBAL_MMR64_PNODE_SHIFT 26
@@ -232,6 +234,26 @@ static inline unsigned long uv_gpa(void *v)
return uv_soc_phys_ram_to_gpa(__pa(v));
}
+/* Top two bits indicate the requested address is in MMR space. */
+static inline int
+uv_gpa_in_mmr_space(unsigned long gpa)
+{
+ return (gpa >> 62) == 0x3UL;
+}
+
+/* UV global physical address --> socket phys RAM */
+static inline unsigned long uv_gpa_to_soc_phys_ram(unsigned long gpa)
+{
+ unsigned long paddr = gpa & uv_hub_info->gpa_mask;
+ unsigned long remap_base = uv_hub_info->lowmem_remap_base;
+ unsigned long remap_top = uv_hub_info->lowmem_remap_top;
+
+ if (paddr >= remap_base && paddr < remap_base + remap_top)
+ paddr -= remap_base;
+ return paddr;
+}
+
+
/* gnode -> pnode */
static inline unsigned long uv_gpa_to_gnode(unsigned long gpa)
{
@@ -308,6 +330,15 @@ static inline unsigned long uv_read_global_mmr64(int pnode,
}
/*
+ * Global MMR space addresses when referenced by the GRU. (GRU does
+ * NOT use socket addressing).
+ */
+static inline unsigned long uv_global_gru_mmr_address(int pnode, unsigned long offset)
+{
+ return UV_GLOBAL_GRU_MMR_BASE | offset | (pnode << uv_hub_info->m_val);
+}
+
+/*
* Access hub local MMRs. Faster than using global space but only local MMRs
* are accessible.
*/
@@ -434,6 +465,14 @@ static inline void uv_set_cpu_scir_bits(int cpu, unsigned char value)
}
}
+static unsigned long uv_hub_ipi_value(int apicid, int vector, int mode)
+{
+ return (1UL << UVH_IPI_INT_SEND_SHFT) |
+ ((apicid) << UVH_IPI_INT_APIC_ID_SHFT) |
+ (mode << UVH_IPI_INT_DELIVERY_MODE_SHFT) |
+ (vector << UVH_IPI_INT_VECTOR_SHFT);
+}
+
static inline void uv_hub_send_ipi(int pnode, int apicid, int vector)
{
unsigned long val;
@@ -442,10 +481,7 @@ static inline void uv_hub_send_ipi(int pnode, int apicid, int vector)
if (vector == NMI_VECTOR)
dmode = dest_NMI;
- val = (1UL << UVH_IPI_INT_SEND_SHFT) |
- ((apicid) << UVH_IPI_INT_APIC_ID_SHFT) |
- (dmode << UVH_IPI_INT_DELIVERY_MODE_SHFT) |
- (vector << UVH_IPI_INT_VECTOR_SHFT);
+ val = uv_hub_ipi_value(apicid, vector, dmode);
uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
}
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 272514c2d456..2b4945419a84 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -56,6 +56,7 @@
#define SECONDARY_EXEC_ENABLE_VPID 0x00000020
#define SECONDARY_EXEC_WBINVD_EXITING 0x00000040
#define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080
+#define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400
#define PIN_BASED_EXT_INTR_MASK 0x00000001
@@ -144,6 +145,8 @@ enum vmcs_field {
VM_ENTRY_INSTRUCTION_LEN = 0x0000401a,
TPR_THRESHOLD = 0x0000401c,
SECONDARY_VM_EXEC_CONTROL = 0x0000401e,
+ PLE_GAP = 0x00004020,
+ PLE_WINDOW = 0x00004022,
VM_INSTRUCTION_ERROR = 0x00004400,
VM_EXIT_REASON = 0x00004402,
VM_EXIT_INTR_INFO = 0x00004404,
@@ -248,6 +251,7 @@ enum vmcs_field {
#define EXIT_REASON_MSR_READ 31
#define EXIT_REASON_MSR_WRITE 32
#define EXIT_REASON_MWAIT_INSTRUCTION 36
+#define EXIT_REASON_PAUSE_INSTRUCTION 40
#define EXIT_REASON_MCE_DURING_VMENTRY 41
#define EXIT_REASON_TPR_BELOW_THRESHOLD 43
#define EXIT_REASON_APIC_ACCESS 44
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index d8e71459f025..ea0e8ea15e15 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -26,7 +26,7 @@ struct x86_init_mpparse {
void (*smp_read_mpc_oem)(struct mpc_table *mpc);
void (*mpc_oem_pci_bus)(struct mpc_bus *m);
void (*mpc_oem_bus_info)(struct mpc_bus *m, char *name);
- void (*find_smp_config)(unsigned int reserve);
+ void (*find_smp_config)(void);
void (*get_smp_config)(unsigned int early);
};
@@ -125,12 +125,14 @@ struct x86_cpuinit_ops {
* @calibrate_tsc: calibrate TSC
* @get_wallclock: get time from HW clock like RTC etc.
* @set_wallclock: set time back to HW clock
+ * @is_untracked_pat_range exclude from PAT logic
*/
struct x86_platform_ops {
unsigned long (*calibrate_tsc)(void);
unsigned long (*get_wallclock)(void);
int (*set_wallclock)(unsigned long nowtime);
void (*iommu_shutdown)(void);
+ bool (*is_untracked_pat_range)(u64 start, u64 end);
};
extern struct x86_init_ops x86_init;
diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h
index d5b7e90c0edf..396ff4cc8ed4 100644
--- a/arch/x86/include/asm/xen/hypervisor.h
+++ b/arch/x86/include/asm/xen/hypervisor.h
@@ -37,31 +37,4 @@
extern struct shared_info *HYPERVISOR_shared_info;
extern struct start_info *xen_start_info;
-enum xen_domain_type {
- XEN_NATIVE, /* running on bare hardware */
- XEN_PV_DOMAIN, /* running in a PV domain */
- XEN_HVM_DOMAIN, /* running in a Xen hvm domain */
-};
-
-#ifdef CONFIG_XEN
-extern enum xen_domain_type xen_domain_type;
-#else
-#define xen_domain_type XEN_NATIVE
-#endif
-
-#define xen_domain() (xen_domain_type != XEN_NATIVE)
-#define xen_pv_domain() (xen_domain() && \
- xen_domain_type == XEN_PV_DOMAIN)
-#define xen_hvm_domain() (xen_domain() && \
- xen_domain_type == XEN_HVM_DOMAIN)
-
-#ifdef CONFIG_XEN_DOM0
-#include <xen/interface/xen.h>
-
-#define xen_initial_domain() (xen_pv_domain() && \
- xen_start_info->flags & SIF_INITDOMAIN)
-#else /* !CONFIG_XEN_DOM0 */
-#define xen_initial_domain() (0)
-#endif /* CONFIG_XEN_DOM0 */
-
#endif /* _ASM_X86_XEN_HYPERVISOR_H */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 4f2e66e29ecc..d87f09bc5a52 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -89,7 +89,6 @@ obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
obj-$(CONFIG_HPET_TIMER) += hpet.o
obj-$(CONFIG_K8_NB) += k8.o
-obj-$(CONFIG_MGEODE_LX) += geode_32.o mfgpt_32.o
obj-$(CONFIG_DEBUG_RODATA_TEST) += test_rodata.o
obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 67e929b89875..fb1035cd9a6a 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -624,6 +624,7 @@ static int __init acpi_parse_hpet(struct acpi_table_header *table)
}
hpet_address = hpet_tbl->address.address;
+ hpet_blockid = hpet_tbl->sequence;
/*
* Some broken BIOSes advertise HPET at 0x0. We really do not
@@ -1122,7 +1123,7 @@ static int __init acpi_parse_madt_ioapic_entries(void)
if (!acpi_sci_override_gsi)
acpi_sci_ioapic_setup(acpi_gbl_FADT.sci_interrupt, 0, 0);
- /* Fill in identity legacy mapings where no override */
+ /* Fill in identity legacy mappings where no override */
mp_config_acpi_legacy_irqs();
count =
diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c
index 59cdfa4686b2..2e837f5080fe 100644
--- a/arch/x86/kernel/acpi/cstate.c
+++ b/arch/x86/kernel/acpi/cstate.c
@@ -48,7 +48,7 @@ void acpi_processor_power_init_bm_check(struct acpi_processor_flags *flags,
* P4, Core and beyond CPUs
*/
if (c->x86_vendor == X86_VENDOR_INTEL &&
- (c->x86 > 0xf || (c->x86 == 6 && c->x86_model >= 14)))
+ (c->x86 > 0xf || (c->x86 == 6 && c->x86_model >= 0x0f)))
flags->bm_control = 0;
}
EXPORT_SYMBOL(acpi_processor_power_init_bm_check);
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index ca93638ba430..82e508677b91 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -78,12 +78,9 @@ int acpi_save_state_mem(void)
#ifndef CONFIG_64BIT
store_gdt((struct desc_ptr *)&header->pmode_gdt);
- header->pmode_efer_low = nx_enabled;
- if (header->pmode_efer_low & 1) {
- /* This is strange, why not save efer, always? */
- rdmsr(MSR_EFER, header->pmode_efer_low,
- header->pmode_efer_high);
- }
+ if (rdmsr_safe(MSR_EFER, &header->pmode_efer_low,
+ &header->pmode_efer_high))
+ header->pmode_efer_low = header->pmode_efer_high = 0;
#endif /* !CONFIG_64BIT */
header->pmode_cr0 = read_cr0();
@@ -119,29 +116,32 @@ void acpi_restore_state_mem(void)
/**
- * acpi_reserve_bootmem - do _very_ early ACPI initialisation
+ * acpi_reserve_wakeup_memory - do _very_ early ACPI initialisation
*
* We allocate a page from the first 1MB of memory for the wakeup
* routine for when we come back from a sleep state. The
* runtime allocator allows specification of <16MB pages, but not
* <1MB pages.
*/
-void __init acpi_reserve_bootmem(void)
+void __init acpi_reserve_wakeup_memory(void)
{
+ unsigned long mem;
+
if ((&wakeup_code_end - &wakeup_code_start) > WAKEUP_SIZE) {
printk(KERN_ERR
"ACPI: Wakeup code way too big, S3 disabled.\n");
return;
}
- acpi_realmode = (unsigned long)alloc_bootmem_low(WAKEUP_SIZE);
+ mem = find_e820_area(0, 1<<20, WAKEUP_SIZE, PAGE_SIZE);
- if (!acpi_realmode) {
+ if (mem == -1L) {
printk(KERN_ERR "ACPI: Cannot allocate lowmem, S3 disabled.\n");
return;
}
-
- acpi_wakeup_address = virt_to_phys((void *)acpi_realmode);
+ acpi_realmode = (unsigned long) phys_to_virt(mem);
+ acpi_wakeup_address = mem;
+ reserve_early(mem, mem + WAKEUP_SIZE, "ACPI WAKEUP");
}
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 32fb09102a13..23824fef789c 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -19,7 +19,7 @@
#include <linux/pci.h>
#include <linux/gfp.h>
-#include <linux/bitops.h>
+#include <linux/bitmap.h>
#include <linux/debugfs.h>
#include <linux/scatterlist.h>
#include <linux/dma-mapping.h>
@@ -166,6 +166,43 @@ static void iommu_uninit_device(struct device *dev)
{
kfree(dev->archdata.iommu);
}
+
+void __init amd_iommu_uninit_devices(void)
+{
+ struct pci_dev *pdev = NULL;
+
+ for_each_pci_dev(pdev) {
+
+ if (!check_device(&pdev->dev))
+ continue;
+
+ iommu_uninit_device(&pdev->dev);
+ }
+}
+
+int __init amd_iommu_init_devices(void)
+{
+ struct pci_dev *pdev = NULL;
+ int ret = 0;
+
+ for_each_pci_dev(pdev) {
+
+ if (!check_device(&pdev->dev))
+ continue;
+
+ ret = iommu_init_device(&pdev->dev);
+ if (ret)
+ goto out_free;
+ }
+
+ return 0;
+
+out_free:
+
+ amd_iommu_uninit_devices();
+
+ return ret;
+}
#ifdef CONFIG_AMD_IOMMU_STATS
/*
@@ -1125,7 +1162,7 @@ static void dma_ops_free_addresses(struct dma_ops_domain *dom,
address = (address % APERTURE_RANGE_SIZE) >> PAGE_SHIFT;
- iommu_area_free(range->bitmap, address, pages);
+ bitmap_clear(range->bitmap, address, pages);
}
@@ -1587,6 +1624,11 @@ static struct notifier_block device_nb = {
.notifier_call = device_change_notifier,
};
+void amd_iommu_init_notifier(void)
+{
+ bus_register_notifier(&pci_bus_type, &device_nb);
+}
+
/*****************************************************************************
*
* The next functions belong to the dma_ops mapping/unmapping code.
@@ -1783,7 +1825,7 @@ retry:
goto out;
/*
- * aperture was sucessfully enlarged by 128 MB, try
+ * aperture was successfully enlarged by 128 MB, try
* allocation again
*/
goto retry;
@@ -2145,8 +2187,6 @@ static void prealloc_protection_domains(void)
if (!check_device(&dev->dev))
continue;
- iommu_init_device(&dev->dev);
-
/* Is there already any domain for it? */
if (domain_for_device(&dev->dev))
continue;
@@ -2215,8 +2255,6 @@ int __init amd_iommu_init_dma_ops(void)
register_iommu(&amd_iommu_ops);
- bus_register_notifier(&pci_bus_type, &device_nb);
-
amd_iommu_stats_init();
return 0;
@@ -2490,7 +2528,7 @@ int __init amd_iommu_init_passthrough(void)
struct pci_dev *dev = NULL;
u16 devid;
- /* allocate passthroug domain */
+ /* allocate passthrough domain */
pt_domain = protection_domain_alloc();
if (!pt_domain)
return -ENOMEM;
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 7ffc39965233..1dca9c34eaeb 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -1274,6 +1274,10 @@ static int __init amd_iommu_init(void)
if (ret)
goto free;
+ ret = amd_iommu_init_devices();
+ if (ret)
+ goto free;
+
if (iommu_pass_through)
ret = amd_iommu_init_passthrough();
else
@@ -1281,6 +1285,8 @@ static int __init amd_iommu_init(void)
if (ret)
goto free;
+ amd_iommu_init_notifier();
+
enable_iommus();
if (iommu_pass_through)
@@ -1296,6 +1302,9 @@ out:
return ret;
free:
+
+ amd_iommu_uninit_devices();
+
free_pages((unsigned long)amd_iommu_pd_alloc_bitmap,
get_order(MAX_DOMAIN_ID/8));
@@ -1336,6 +1345,9 @@ void __init amd_iommu_detect(void)
iommu_detected = 1;
amd_iommu_detected = 1;
x86_init.iommu.iommu_init = amd_iommu_init;
+
+ /* Make sure ACS will be enabled */
+ pci_request_acs();
}
}
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index e0dfb6856aa2..3704997e8b25 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -280,7 +280,8 @@ void __init early_gart_iommu_check(void)
* or BIOS forget to put that in reserved.
* try to update e820 to make that region as reserved.
*/
- int i, fix, slot;
+ u32 agp_aper_base = 0, agp_aper_order = 0;
+ int i, fix, slot, valid_agp = 0;
u32 ctl;
u32 aper_size = 0, aper_order = 0, last_aper_order = 0;
u64 aper_base = 0, last_aper_base = 0;
@@ -290,6 +291,8 @@ void __init early_gart_iommu_check(void)
return;
/* This is mostly duplicate of iommu_hole_init */
+ agp_aper_base = search_agp_bridge(&agp_aper_order, &valid_agp);
+
fix = 0;
for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) {
int bus;
@@ -342,10 +345,10 @@ void __init early_gart_iommu_check(void)
}
}
- if (!fix)
+ if (valid_agp)
return;
- /* different nodes have different setting, disable them all at first*/
+ /* disable them all at first */
for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) {
int bus;
int dev_base, dev_limit;
@@ -458,8 +461,6 @@ out:
if (aper_alloc) {
/* Got the aperture from the AGP bridge */
- } else if (!valid_agp) {
- /* Do nothing */
} else if ((!no_iommu && max_pfn > MAX_DMA32_PFN) ||
force_iommu ||
valid_agp ||
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index ad8c75b9e453..aa57c079c98f 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -647,7 +647,7 @@ static int __init calibrate_APIC_clock(void)
calibration_result = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS;
apic_printk(APIC_VERBOSE, "..... delta %ld\n", delta);
- apic_printk(APIC_VERBOSE, "..... mult: %ld\n", lapic_clockevent.mult);
+ apic_printk(APIC_VERBOSE, "..... mult: %u\n", lapic_clockevent.mult);
apic_printk(APIC_VERBOSE, "..... calibration result: %u\n",
calibration_result);
@@ -1341,7 +1341,7 @@ void enable_x2apic(void)
rdmsr(MSR_IA32_APICBASE, msr, msr2);
if (!(msr & X2APIC_ENABLE)) {
- pr_info("Enabling x2apic\n");
+ printk_once(KERN_INFO "Enabling x2apic\n");
wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0);
}
}
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index d0c99abc26c3..eacbd2b31d27 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -306,10 +306,7 @@ physflat_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
if (cpumask_test_cpu(cpu, cpu_online_mask))
break;
}
- if (cpu < nr_cpu_ids)
- return per_cpu(x86_cpu_to_apicid, cpu);
-
- return BAD_APICID;
+ return per_cpu(x86_cpu_to_apicid, cpu);
}
struct apic apic_physflat = {
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c
index d9acc3bee0f4..e31b9ffe25f5 100644
--- a/arch/x86/kernel/apic/apic_noop.c
+++ b/arch/x86/kernel/apic/apic_noop.c
@@ -127,7 +127,7 @@ static u32 noop_apic_read(u32 reg)
static void noop_apic_write(u32 reg, u32 v)
{
- WARN_ON_ONCE((cpu_has_apic || !disable_apic));
+ WARN_ON_ONCE(cpu_has_apic && !disable_apic);
}
struct apic apic_noop = {
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c
index 38dcecfa5818..cb804c5091b9 100644
--- a/arch/x86/kernel/apic/bigsmp_32.c
+++ b/arch/x86/kernel/apic/bigsmp_32.c
@@ -131,10 +131,7 @@ static unsigned int bigsmp_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
if (cpumask_test_cpu(cpu, cpu_online_mask))
break;
}
- if (cpu < nr_cpu_ids)
- return bigsmp_cpu_to_logical_apicid(cpu);
-
- return BAD_APICID;
+ return bigsmp_cpu_to_logical_apicid(cpu);
}
static int bigsmp_phys_pkg_id(int cpuid_apic, int index_msb)
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c
index e85f8fb7f8e7..dd2b5f264643 100644
--- a/arch/x86/kernel/apic/es7000_32.c
+++ b/arch/x86/kernel/apic/es7000_32.c
@@ -27,6 +27,9 @@
*
* http://www.unisys.com
*/
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/notifier.h>
#include <linux/spinlock.h>
#include <linux/cpumask.h>
@@ -223,9 +226,9 @@ static int parse_unisys_oem(char *oemptr)
mip_addr = val;
mip = (struct mip_reg *)val;
mip_reg = __va(mip);
- pr_debug("es7000_mipcfg: host_reg = 0x%lx \n",
+ pr_debug("host_reg = 0x%lx\n",
(unsigned long)host_reg);
- pr_debug("es7000_mipcfg: mip_reg = 0x%lx \n",
+ pr_debug("mip_reg = 0x%lx\n",
(unsigned long)mip_reg);
success++;
break;
@@ -401,7 +404,7 @@ static void es7000_enable_apic_mode(void)
if (!es7000_plat)
return;
- printk(KERN_INFO "ES7000: Enabling APIC mode.\n");
+ pr_info("Enabling APIC mode.\n");
memset(&es7000_mip_reg, 0, sizeof(struct mip_reg));
es7000_mip_reg.off_0x00 = MIP_SW_APIC;
es7000_mip_reg.off_0x38 = MIP_VALID;
@@ -514,8 +517,7 @@ static void es7000_setup_apic_routing(void)
{
int apic = per_cpu(x86_bios_cpu_apicid, smp_processor_id());
- printk(KERN_INFO
- "Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n",
+ pr_info("Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n",
(apic_version[apic] == 0x14) ?
"Physical Cluster" : "Logical Cluster",
nr_ioapics, cpumask_bits(es7000_target_cpus())[0]);
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index c0b4468683f9..de00c4619a55 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -2276,26 +2276,28 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq
/*
* Either sets desc->affinity to a valid value, and returns
- * ->cpu_mask_to_apicid of that, or returns BAD_APICID and
+ * ->cpu_mask_to_apicid of that in dest_id, or returns -1 and
* leaves desc->affinity untouched.
*/
unsigned int
-set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask)
+set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask,
+ unsigned int *dest_id)
{
struct irq_cfg *cfg;
unsigned int irq;
if (!cpumask_intersects(mask, cpu_online_mask))
- return BAD_APICID;
+ return -1;
irq = desc->irq;
cfg = desc->chip_data;
if (assign_irq_vector(irq, cfg, mask))
- return BAD_APICID;
+ return -1;
cpumask_copy(desc->affinity, mask);
- return apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain);
+ *dest_id = apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain);
+ return 0;
}
static int
@@ -2311,12 +2313,11 @@ set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
cfg = desc->chip_data;
spin_lock_irqsave(&ioapic_lock, flags);
- dest = set_desc_affinity(desc, mask);
- if (dest != BAD_APICID) {
+ ret = set_desc_affinity(desc, mask, &dest);
+ if (!ret) {
/* Only the high 8 bits are valid. */
dest = SET_APIC_LOGICAL_ID(dest);
__target_IO_APIC_irq(irq, dest, cfg);
- ret = 0;
}
spin_unlock_irqrestore(&ioapic_lock, flags);
@@ -2431,7 +2432,7 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
continue;
cfg = irq_cfg(irq);
- spin_lock(&desc->lock);
+ raw_spin_lock(&desc->lock);
if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
goto unlock;
@@ -2450,7 +2451,7 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
}
__get_cpu_var(vector_irq)[vector] = -1;
unlock:
- spin_unlock(&desc->lock);
+ raw_spin_unlock(&desc->lock);
}
irq_exit();
@@ -3267,7 +3268,8 @@ void destroy_irq(unsigned int irq)
* MSI message composition
*/
#ifdef CONFIG_PCI_MSI
-static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
+static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq,
+ struct msi_msg *msg, u8 hpet_id)
{
struct irq_cfg *cfg;
int err;
@@ -3301,7 +3303,10 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
irte.dest_id = IRTE_DEST(dest);
/* Set source-id of interrupt request */
- set_msi_sid(&irte, pdev);
+ if (pdev)
+ set_msi_sid(&irte, pdev);
+ else
+ set_hpet_sid(&irte, hpet_id);
modify_irte(irq, &irte);
@@ -3347,8 +3352,7 @@ static int set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
struct msi_msg msg;
unsigned int dest;
- dest = set_desc_affinity(desc, mask);
- if (dest == BAD_APICID)
+ if (set_desc_affinity(desc, mask, &dest))
return -1;
cfg = desc->chip_data;
@@ -3380,8 +3384,7 @@ ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
if (get_irte(irq, &irte))
return -1;
- dest = set_desc_affinity(desc, mask);
- if (dest == BAD_APICID)
+ if (set_desc_affinity(desc, mask, &dest))
return -1;
irte.vector = cfg->vector;
@@ -3466,7 +3469,7 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
int ret;
struct msi_msg msg;
- ret = msi_compose_msg(dev, irq, &msg);
+ ret = msi_compose_msg(dev, irq, &msg, -1);
if (ret < 0)
return ret;
@@ -3563,8 +3566,7 @@ static int dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
struct msi_msg msg;
unsigned int dest;
- dest = set_desc_affinity(desc, mask);
- if (dest == BAD_APICID)
+ if (set_desc_affinity(desc, mask, &dest))
return -1;
cfg = desc->chip_data;
@@ -3599,7 +3601,7 @@ int arch_setup_dmar_msi(unsigned int irq)
int ret;
struct msi_msg msg;
- ret = msi_compose_msg(NULL, irq, &msg);
+ ret = msi_compose_msg(NULL, irq, &msg, -1);
if (ret < 0)
return ret;
dmar_msi_write(irq, &msg);
@@ -3619,8 +3621,7 @@ static int hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
struct msi_msg msg;
unsigned int dest;
- dest = set_desc_affinity(desc, mask);
- if (dest == BAD_APICID)
+ if (set_desc_affinity(desc, mask, &dest))
return -1;
cfg = desc->chip_data;
@@ -3639,6 +3640,19 @@ static int hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
#endif /* CONFIG_SMP */
+static struct irq_chip ir_hpet_msi_type = {
+ .name = "IR-HPET_MSI",
+ .unmask = hpet_msi_unmask,
+ .mask = hpet_msi_mask,
+#ifdef CONFIG_INTR_REMAP
+ .ack = ir_ack_apic_edge,
+#ifdef CONFIG_SMP
+ .set_affinity = ir_set_msi_irq_affinity,
+#endif
+#endif
+ .retrigger = ioapic_retrigger_irq,
+};
+
static struct irq_chip hpet_msi_type = {
.name = "HPET_MSI",
.unmask = hpet_msi_unmask,
@@ -3650,20 +3664,36 @@ static struct irq_chip hpet_msi_type = {
.retrigger = ioapic_retrigger_irq,
};
-int arch_setup_hpet_msi(unsigned int irq)
+int arch_setup_hpet_msi(unsigned int irq, unsigned int id)
{
int ret;
struct msi_msg msg;
struct irq_desc *desc = irq_to_desc(irq);
- ret = msi_compose_msg(NULL, irq, &msg);
+ if (intr_remapping_enabled) {
+ struct intel_iommu *iommu = map_hpet_to_ir(id);
+ int index;
+
+ if (!iommu)
+ return -1;
+
+ index = alloc_irte(iommu, irq, 1);
+ if (index < 0)
+ return -1;
+ }
+
+ ret = msi_compose_msg(NULL, irq, &msg, id);
if (ret < 0)
return ret;
hpet_msi_write(irq, &msg);
desc->status |= IRQ_MOVE_PCNTXT;
- set_irq_chip_and_handler_name(irq, &hpet_msi_type, handle_edge_irq,
- "edge");
+ if (irq_remapped(irq))
+ set_irq_chip_and_handler_name(irq, &ir_hpet_msi_type,
+ handle_edge_irq, "edge");
+ else
+ set_irq_chip_and_handler_name(irq, &hpet_msi_type,
+ handle_edge_irq, "edge");
return 0;
}
@@ -3697,8 +3727,7 @@ static int set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask)
struct irq_cfg *cfg;
unsigned int dest;
- dest = set_desc_affinity(desc, mask);
- if (dest == BAD_APICID)
+ if (set_desc_affinity(desc, mask, &dest))
return -1;
cfg = desc->chip_data;
diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c
index 6389432a9dbf..0159a69396cb 100644
--- a/arch/x86/kernel/apic/nmi.c
+++ b/arch/x86/kernel/apic/nmi.c
@@ -361,7 +361,7 @@ void stop_apic_nmi_watchdog(void *unused)
*/
static DEFINE_PER_CPU(unsigned, last_irq_sum);
-static DEFINE_PER_CPU(local_t, alert_counter);
+static DEFINE_PER_CPU(long, alert_counter);
static DEFINE_PER_CPU(int, nmi_touch);
void touch_nmi_watchdog(void)
@@ -438,8 +438,8 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
* Ayiee, looks like this CPU is stuck ...
* wait a few IRQs (5 seconds) before doing the oops ...
*/
- local_inc(&__get_cpu_var(alert_counter));
- if (local_read(&__get_cpu_var(alert_counter)) == 5 * nmi_hz)
+ __this_cpu_inc(per_cpu_var(alert_counter));
+ if (__this_cpu_read(per_cpu_var(alert_counter)) == 5 * nmi_hz)
/*
* die_nmi will return ONLY if NOTIFY_STOP happens..
*/
@@ -447,7 +447,7 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
regs, panic_on_timeout);
} else {
__get_cpu_var(last_irq_sum) = sum;
- local_set(&__get_cpu_var(alert_counter), 0);
+ __this_cpu_write(per_cpu_var(alert_counter), 0);
}
/* see if the nmi watchdog went off */
diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c
index 07cdbdcd7a92..98c4665f251c 100644
--- a/arch/x86/kernel/apic/numaq_32.c
+++ b/arch/x86/kernel/apic/numaq_32.c
@@ -264,11 +264,6 @@ static void __init smp_read_mpc_oem(struct mpc_table *mpc)
static __init void early_check_numaq(void)
{
/*
- * Find possible boot-time SMP configuration:
- */
- early_find_smp_config();
-
- /*
* get boot-time SMP configuration:
*/
if (smp_found_config)
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index a5371ec36776..cf69c59f4910 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -148,10 +148,7 @@ x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
break;
}
- if (cpu < nr_cpu_ids)
- return per_cpu(x86_cpu_to_logical_apicid, cpu);
-
- return BAD_APICID;
+ return per_cpu(x86_cpu_to_logical_apicid, cpu);
}
static unsigned int x2apic_cluster_phys_get_apic_id(unsigned long x)
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index a8989aadc99a..8972f38c5ced 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -146,10 +146,7 @@ x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
break;
}
- if (cpu < nr_cpu_ids)
- return per_cpu(x86_cpu_to_apicid, cpu);
-
- return BAD_APICID;
+ return per_cpu(x86_cpu_to_apicid, cpu);
}
static unsigned int x2apic_phys_get_apic_id(unsigned long x)
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 130c4b934877..d56b0efb2057 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -30,10 +30,22 @@
#include <asm/apic.h>
#include <asm/ipi.h>
#include <asm/smp.h>
+#include <asm/x86_init.h>
DEFINE_PER_CPU(int, x2apic_extra_bits);
static enum uv_system_type uv_system_type;
+static u64 gru_start_paddr, gru_end_paddr;
+
+static inline bool is_GRU_range(u64 start, u64 end)
+{
+ return start >= gru_start_paddr && end <= gru_end_paddr;
+}
+
+static bool uv_is_untracked_pat_range(u64 start, u64 end)
+{
+ return is_ISA_range(start, end) || is_GRU_range(start, end);
+}
static int early_get_nodeid(void)
{
@@ -49,6 +61,7 @@ static int early_get_nodeid(void)
static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
{
if (!strcmp(oem_id, "SGI")) {
+ x86_platform.is_untracked_pat_range = uv_is_untracked_pat_range;
if (!strcmp(oem_table_id, "UVL"))
uv_system_type = UV_LEGACY_APIC;
else if (!strcmp(oem_table_id, "UVX"))
@@ -212,10 +225,7 @@ uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
if (cpumask_test_cpu(cpu, cpu_online_mask))
break;
}
- if (cpu < nr_cpu_ids)
- return per_cpu(x86_cpu_to_apicid, cpu);
-
- return BAD_APICID;
+ return per_cpu(x86_cpu_to_apicid, cpu);
}
static unsigned int x2apic_get_apic_id(unsigned long x)
@@ -385,8 +395,12 @@ static __init void map_gru_high(int max_pnode)
int shift = UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT;
gru.v = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR);
- if (gru.s.enable)
+ if (gru.s.enable) {
map_high("GRU", gru.s.base, shift, max_pnode, map_wb);
+ gru_start_paddr = ((u64)gru.s.base << shift);
+ gru_end_paddr = gru_start_paddr + (1UL << shift) * (max_pnode + 1);
+
+ }
}
static __init void map_mmr_high(int max_pnode)
diff --git a/arch/x86/kernel/bios_uv.c b/arch/x86/kernel/bios_uv.c
index 63a88e1f987d..b0206a211b09 100644
--- a/arch/x86/kernel/bios_uv.c
+++ b/arch/x86/kernel/bios_uv.c
@@ -101,21 +101,17 @@ s64 uv_bios_get_sn_info(int fc, int *uvtype, long *partid, long *coher,
}
int
-uv_bios_mq_watchlist_alloc(int blade, unsigned long addr, unsigned int mq_size,
+uv_bios_mq_watchlist_alloc(unsigned long addr, unsigned int mq_size,
unsigned long *intr_mmr_offset)
{
- union uv_watchlist_u size_blade;
u64 watchlist;
s64 ret;
- size_blade.size = mq_size;
- size_blade.blade = blade;
-
/*
* bios returns watchlist number or negative error number.
*/
ret = (int)uv_bios_call_irqsave(UV_BIOS_WATCHLIST_ALLOC, addr,
- size_blade.val, (u64)intr_mmr_offset,
+ mq_size, (u64)intr_mmr_offset,
(u64)&watchlist, 0);
if (ret < BIOS_STATUS_SUCCESS)
return ret;
diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c
index c965e5212714..468489b57aae 100644
--- a/arch/x86/kernel/cpu/addon_cpuid_features.c
+++ b/arch/x86/kernel/cpu/addon_cpuid_features.c
@@ -74,6 +74,7 @@ void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c)
unsigned int eax, ebx, ecx, edx, sub_index;
unsigned int ht_mask_width, core_plus_mask_width;
unsigned int core_select_mask, core_level_siblings;
+ static bool printed;
if (c->cpuid_level < 0xb)
return;
@@ -127,12 +128,14 @@ void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c)
c->x86_max_cores = (core_level_siblings / smp_num_siblings);
-
- printk(KERN_INFO "CPU: Physical Processor ID: %d\n",
- c->phys_proc_id);
- if (c->x86_max_cores > 1)
- printk(KERN_INFO "CPU: Processor Core ID: %d\n",
- c->cpu_core_id);
+ if (!printed) {
+ printk(KERN_INFO "CPU: Physical Processor ID: %d\n",
+ c->phys_proc_id);
+ if (c->x86_max_cores > 1)
+ printk(KERN_INFO "CPU: Processor Core ID: %d\n",
+ c->cpu_core_id);
+ printed = 1;
+ }
return;
#endif
}
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 7128b3799cec..e485825130d2 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -254,59 +254,36 @@ static int __cpuinit nearby_node(int apicid)
/*
* Fixup core topology information for AMD multi-node processors.
- * Assumption 1: Number of cores in each internal node is the same.
- * Assumption 2: Mixed systems with both single-node and dual-node
- * processors are not supported.
+ * Assumption: Number of cores in each internal node is the same.
*/
#ifdef CONFIG_X86_HT
static void __cpuinit amd_fixup_dcm(struct cpuinfo_x86 *c)
{
-#ifdef CONFIG_PCI
- u32 t, cpn;
- u8 n, n_id;
+ unsigned long long value;
+ u32 nodes, cores_per_node;
int cpu = smp_processor_id();
+ if (!cpu_has(c, X86_FEATURE_NODEID_MSR))
+ return;
+
/* fixup topology information only once for a core */
if (cpu_has(c, X86_FEATURE_AMD_DCM))
return;
- /* check for multi-node processor on boot cpu */
- t = read_pci_config(0, 24, 3, 0xe8);
- if (!(t & (1 << 29)))
+ rdmsrl(MSR_FAM10H_NODE_ID, value);
+
+ nodes = ((value >> 3) & 7) + 1;
+ if (nodes == 1)
return;
set_cpu_cap(c, X86_FEATURE_AMD_DCM);
+ cores_per_node = c->x86_max_cores / nodes;
- /* cores per node: each internal node has half the number of cores */
- cpn = c->x86_max_cores >> 1;
-
- /* even-numbered NB_id of this dual-node processor */
- n = c->phys_proc_id << 1;
-
- /*
- * determine internal node id and assign cores fifty-fifty to
- * each node of the dual-node processor
- */
- t = read_pci_config(0, 24 + n, 3, 0xe8);
- n = (t>>30) & 0x3;
- if (n == 0) {
- if (c->cpu_core_id < cpn)
- n_id = 0;
- else
- n_id = 1;
- } else {
- if (c->cpu_core_id < cpn)
- n_id = 1;
- else
- n_id = 0;
- }
-
- /* compute entire NodeID, use llc_shared_map to store sibling info */
- per_cpu(cpu_llc_id, cpu) = (c->phys_proc_id << 1) + n_id;
+ /* store NodeID, use llc_shared_map to store sibling info */
+ per_cpu(cpu_llc_id, cpu) = value & 7;
- /* fixup core id to be in range from 0 to cpn */
- c->cpu_core_id = c->cpu_core_id % cpn;
-#endif
+ /* fixup core id to be in range from 0 to (cores_per_node - 1) */
+ c->cpu_core_id = c->cpu_core_id % cores_per_node;
}
#endif
@@ -375,8 +352,6 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
node = nearby_node(apicid);
}
numa_set_node(cpu, node);
-
- printk(KERN_INFO "CPU %d/0x%x -> Node %d\n", cpu, apicid, node);
#endif
}
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index a4ec8b647544..4868e4a951ee 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -427,6 +427,7 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c)
#ifdef CONFIG_X86_HT
u32 eax, ebx, ecx, edx;
int index_msb, core_bits;
+ static bool printed;
if (!cpu_has(c, X86_FEATURE_HT))
return;
@@ -442,7 +443,7 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c)
smp_num_siblings = (ebx & 0xff0000) >> 16;
if (smp_num_siblings == 1) {
- printk(KERN_INFO "CPU: Hyper-Threading is disabled\n");
+ printk_once(KERN_INFO "CPU0: Hyper-Threading is disabled\n");
goto out;
}
@@ -469,11 +470,12 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c)
((1 << core_bits) - 1);
out:
- if ((c->x86_max_cores * smp_num_siblings) > 1) {
+ if (!printed && (c->x86_max_cores * smp_num_siblings) > 1) {
printk(KERN_INFO "CPU: Physical Processor ID: %d\n",
c->phys_proc_id);
printk(KERN_INFO "CPU: Processor Core ID: %d\n",
c->cpu_core_id);
+ printed = 1;
}
#endif
}
@@ -1093,7 +1095,7 @@ static void clear_all_debug_regs(void)
void __cpuinit cpu_init(void)
{
- struct orig_ist *orig_ist;
+ struct orig_ist *oist;
struct task_struct *me;
struct tss_struct *t;
unsigned long v;
@@ -1102,7 +1104,7 @@ void __cpuinit cpu_init(void)
cpu = stack_smp_processor_id();
t = &per_cpu(init_tss, cpu);
- orig_ist = &per_cpu(orig_ist, cpu);
+ oist = &per_cpu(orig_ist, cpu);
#ifdef CONFIG_NUMA
if (cpu != 0 && percpu_read(node_number) == 0 &&
@@ -1115,7 +1117,7 @@ void __cpuinit cpu_init(void)
if (cpumask_test_and_set_cpu(cpu, cpu_initialized_mask))
panic("CPU#%d already initialized!\n", cpu);
- printk(KERN_INFO "Initializing CPU#%d\n", cpu);
+ pr_debug("Initializing CPU#%d\n", cpu);
clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
@@ -1136,19 +1138,19 @@ void __cpuinit cpu_init(void)
wrmsrl(MSR_KERNEL_GS_BASE, 0);
barrier();
- check_efer();
+ x86_configure_nx();
if (cpu != 0)
enable_x2apic();
/*
* set up and load the per-CPU TSS
*/
- if (!orig_ist->ist[0]) {
+ if (!oist->ist[0]) {
char *estacks = per_cpu(exception_stacks, cpu);
for (v = 0; v < N_EXCEPTION_STACKS; v++) {
estacks += exception_stack_sizes[v];
- orig_ist->ist[v] = t->x86_tss.ist[v] =
+ oist->ist[v] = t->x86_tss.ist[v] =
(unsigned long)estacks;
}
}
diff --git a/arch/x86/kernel/cpu/cpu_debug.c b/arch/x86/kernel/cpu/cpu_debug.c
index dca325c03999..b368cd862997 100644
--- a/arch/x86/kernel/cpu/cpu_debug.c
+++ b/arch/x86/kernel/cpu/cpu_debug.c
@@ -30,9 +30,9 @@
#include <asm/apic.h>
#include <asm/desc.h>
-static DEFINE_PER_CPU(struct cpu_cpuX_base [CPU_REG_ALL_BIT], cpu_arr);
-static DEFINE_PER_CPU(struct cpu_private * [MAX_CPU_FILES], priv_arr);
-static DEFINE_PER_CPU(int, cpu_priv_count);
+static DEFINE_PER_CPU(struct cpu_cpuX_base [CPU_REG_ALL_BIT], cpud_arr);
+static DEFINE_PER_CPU(struct cpu_private * [MAX_CPU_FILES], cpud_priv_arr);
+static DEFINE_PER_CPU(int, cpud_priv_count);
static DEFINE_MUTEX(cpu_debug_lock);
@@ -531,7 +531,7 @@ static int cpu_create_file(unsigned cpu, unsigned type, unsigned reg,
/* Already intialized */
if (file == CPU_INDEX_BIT)
- if (per_cpu(cpu_arr[type].init, cpu))
+ if (per_cpu(cpud_arr[type].init, cpu))
return 0;
priv = kzalloc(sizeof(*priv), GFP_KERNEL);
@@ -543,8 +543,8 @@ static int cpu_create_file(unsigned cpu, unsigned type, unsigned reg,
priv->reg = reg;
priv->file = file;
mutex_lock(&cpu_debug_lock);
- per_cpu(priv_arr[type], cpu) = priv;
- per_cpu(cpu_priv_count, cpu)++;
+ per_cpu(cpud_priv_arr[type], cpu) = priv;
+ per_cpu(cpud_priv_count, cpu)++;
mutex_unlock(&cpu_debug_lock);
if (file)
@@ -552,10 +552,10 @@ static int cpu_create_file(unsigned cpu, unsigned type, unsigned reg,
dentry, (void *)priv, &cpu_fops);
else {
debugfs_create_file(cpu_base[type].name, S_IRUGO,
- per_cpu(cpu_arr[type].dentry, cpu),
+ per_cpu(cpud_arr[type].dentry, cpu),
(void *)priv, &cpu_fops);
mutex_lock(&cpu_debug_lock);
- per_cpu(cpu_arr[type].init, cpu) = 1;
+ per_cpu(cpud_arr[type].init, cpu) = 1;
mutex_unlock(&cpu_debug_lock);
}
@@ -615,7 +615,7 @@ static int cpu_init_allreg(unsigned cpu, struct dentry *dentry)
if (!is_typeflag_valid(cpu, cpu_base[type].flag))
continue;
cpu_dentry = debugfs_create_dir(cpu_base[type].name, dentry);
- per_cpu(cpu_arr[type].dentry, cpu) = cpu_dentry;
+ per_cpu(cpud_arr[type].dentry, cpu) = cpu_dentry;
if (type < CPU_TSS_BIT)
err = cpu_init_msr(cpu, type, cpu_dentry);
@@ -647,11 +647,11 @@ static int cpu_init_cpu(void)
err = cpu_init_allreg(cpu, cpu_dentry);
pr_info("cpu%d(%d) debug files %d\n",
- cpu, nr_cpu_ids, per_cpu(cpu_priv_count, cpu));
- if (per_cpu(cpu_priv_count, cpu) > MAX_CPU_FILES) {
+ cpu, nr_cpu_ids, per_cpu(cpud_priv_count, cpu));
+ if (per_cpu(cpud_priv_count, cpu) > MAX_CPU_FILES) {
pr_err("Register files count %d exceeds limit %d\n",
- per_cpu(cpu_priv_count, cpu), MAX_CPU_FILES);
- per_cpu(cpu_priv_count, cpu) = MAX_CPU_FILES;
+ per_cpu(cpud_priv_count, cpu), MAX_CPU_FILES);
+ per_cpu(cpud_priv_count, cpu) = MAX_CPU_FILES;
err = -ENFILE;
}
if (err)
@@ -676,8 +676,8 @@ static void __exit cpu_debug_exit(void)
debugfs_remove_recursive(cpu_debugfs_dir);
for (cpu = 0; cpu < nr_cpu_ids; cpu++)
- for (i = 0; i < per_cpu(cpu_priv_count, cpu); i++)
- kfree(per_cpu(priv_arr[i], cpu));
+ for (i = 0; i < per_cpu(cpud_priv_count, cpu); i++)
+ kfree(per_cpu(cpud_priv_arr[i], cpu));
}
module_init(cpu_debug_init);
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index 8b581d3905cb..f28decf8dde3 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -68,9 +68,9 @@ struct acpi_cpufreq_data {
unsigned int cpu_feature;
};
-static DEFINE_PER_CPU(struct acpi_cpufreq_data *, drv_data);
+static DEFINE_PER_CPU(struct acpi_cpufreq_data *, acfreq_data);
-static DEFINE_PER_CPU(struct aperfmperf, old_perf);
+static DEFINE_PER_CPU(struct aperfmperf, acfreq_old_perf);
/* acpi_perf_data is a pointer to percpu data. */
static struct acpi_processor_performance *acpi_perf_data;
@@ -214,14 +214,14 @@ static u32 get_cur_val(const struct cpumask *mask)
if (unlikely(cpumask_empty(mask)))
return 0;
- switch (per_cpu(drv_data, cpumask_first(mask))->cpu_feature) {
+ switch (per_cpu(acfreq_data, cpumask_first(mask))->cpu_feature) {
case SYSTEM_INTEL_MSR_CAPABLE:
cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
cmd.addr.msr.reg = MSR_IA32_PERF_STATUS;
break;
case SYSTEM_IO_CAPABLE:
cmd.type = SYSTEM_IO_CAPABLE;
- perf = per_cpu(drv_data, cpumask_first(mask))->acpi_data;
+ perf = per_cpu(acfreq_data, cpumask_first(mask))->acpi_data;
cmd.addr.io.port = perf->control_register.address;
cmd.addr.io.bit_width = perf->control_register.bit_width;
break;
@@ -268,8 +268,8 @@ static unsigned int get_measured_perf(struct cpufreq_policy *policy,
if (smp_call_function_single(cpu, read_measured_perf_ctrs, &perf, 1))
return 0;
- ratio = calc_aperfmperf_ratio(&per_cpu(old_perf, cpu), &perf);
- per_cpu(old_perf, cpu) = perf;
+ ratio = calc_aperfmperf_ratio(&per_cpu(acfreq_old_perf, cpu), &perf);
+ per_cpu(acfreq_old_perf, cpu) = perf;
retval = (policy->cpuinfo.max_freq * ratio) >> APERFMPERF_SHIFT;
@@ -278,7 +278,7 @@ static unsigned int get_measured_perf(struct cpufreq_policy *policy,
static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
{
- struct acpi_cpufreq_data *data = per_cpu(drv_data, cpu);
+ struct acpi_cpufreq_data *data = per_cpu(acfreq_data, cpu);
unsigned int freq;
unsigned int cached_freq;
@@ -322,7 +322,7 @@ static unsigned int check_freqs(const struct cpumask *mask, unsigned int freq,
static int acpi_cpufreq_target(struct cpufreq_policy *policy,
unsigned int target_freq, unsigned int relation)
{
- struct acpi_cpufreq_data *data = per_cpu(drv_data, policy->cpu);
+ struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
struct acpi_processor_performance *perf;
struct cpufreq_freqs freqs;
struct drv_cmd cmd;
@@ -416,7 +416,7 @@ out:
static int acpi_cpufreq_verify(struct cpufreq_policy *policy)
{
- struct acpi_cpufreq_data *data = per_cpu(drv_data, policy->cpu);
+ struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
dprintk("acpi_cpufreq_verify\n");
@@ -574,7 +574,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
return -ENOMEM;
data->acpi_data = per_cpu_ptr(acpi_perf_data, cpu);
- per_cpu(drv_data, cpu) = data;
+ per_cpu(acfreq_data, cpu) = data;
if (cpu_has(c, X86_FEATURE_CONSTANT_TSC))
acpi_cpufreq_driver.flags |= CPUFREQ_CONST_LOOPS;
@@ -725,20 +725,20 @@ err_unreg:
acpi_processor_unregister_performance(perf, cpu);
err_free:
kfree(data);
- per_cpu(drv_data, cpu) = NULL;
+ per_cpu(acfreq_data, cpu) = NULL;
return result;
}
static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy)
{
- struct acpi_cpufreq_data *data = per_cpu(drv_data, policy->cpu);
+ struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
dprintk("acpi_cpufreq_cpu_exit\n");
if (data) {
cpufreq_frequency_table_put_attr(policy->cpu);
- per_cpu(drv_data, policy->cpu) = NULL;
+ per_cpu(acfreq_data, policy->cpu) = NULL;
acpi_processor_unregister_performance(data->acpi_data,
policy->cpu);
kfree(data);
@@ -749,7 +749,7 @@ static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy)
static int acpi_cpufreq_resume(struct cpufreq_policy *policy)
{
- struct acpi_cpufreq_data *data = per_cpu(drv_data, policy->cpu);
+ struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
dprintk("acpi_cpufreq_resume\n");
@@ -764,14 +764,15 @@ static struct freq_attr *acpi_cpufreq_attr[] = {
};
static struct cpufreq_driver acpi_cpufreq_driver = {
- .verify = acpi_cpufreq_verify,
- .target = acpi_cpufreq_target,
- .init = acpi_cpufreq_cpu_init,
- .exit = acpi_cpufreq_cpu_exit,
- .resume = acpi_cpufreq_resume,
- .name = "acpi-cpufreq",
- .owner = THIS_MODULE,
- .attr = acpi_cpufreq_attr,
+ .verify = acpi_cpufreq_verify,
+ .target = acpi_cpufreq_target,
+ .bios_limit = acpi_processor_get_bios_limit,
+ .init = acpi_cpufreq_cpu_init,
+ .exit = acpi_cpufreq_cpu_exit,
+ .resume = acpi_cpufreq_resume,
+ .name = "acpi-cpufreq",
+ .owner = THIS_MODULE,
+ .attr = acpi_cpufreq_attr,
};
static int __init acpi_cpufreq_init(void)
diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.c b/arch/x86/kernel/cpu/cpufreq/longhaul.c
index cabd2fa3fc93..7e7eea4f8261 100644
--- a/arch/x86/kernel/cpu/cpufreq/longhaul.c
+++ b/arch/x86/kernel/cpu/cpufreq/longhaul.c
@@ -885,7 +885,7 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy)
/* Find ACPI data for processor */
acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT,
- ACPI_UINT32_MAX, &longhaul_walk_callback,
+ ACPI_UINT32_MAX, &longhaul_walk_callback, NULL,
NULL, (void *)&pr);
/* Check ACPI support for C3 state */
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c
index f10dea409f40..cb01dac267d3 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c
@@ -164,7 +164,7 @@ static int powernow_k6_cpu_init(struct cpufreq_policy *policy)
}
/* cpuinfo and default policy values */
- policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
+ policy->cpuinfo.transition_latency = 200000;
policy->cur = busfreq * max_multiplier;
result = cpufreq_frequency_table_cpuinfo(policy, clock_ratio);
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
index d47c775eb0ab..9a97116f89e5 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
@@ -714,14 +714,17 @@ static struct freq_attr *powernow_table_attr[] = {
};
static struct cpufreq_driver powernow_driver = {
- .verify = powernow_verify,
- .target = powernow_target,
- .get = powernow_get,
- .init = powernow_cpu_init,
- .exit = powernow_cpu_exit,
- .name = "powernow-k7",
- .owner = THIS_MODULE,
- .attr = powernow_table_attr,
+ .verify = powernow_verify,
+ .target = powernow_target,
+ .get = powernow_get,
+#ifdef CONFIG_X86_POWERNOW_K7_ACPI
+ .bios_limit = acpi_processor_get_bios_limit,
+#endif
+ .init = powernow_cpu_init,
+ .exit = powernow_cpu_exit,
+ .name = "powernow-k7",
+ .owner = THIS_MODULE,
+ .attr = powernow_table_attr,
};
static int __init powernow_init(void)
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index 3f12dabeab52..f125e5c551c0 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -1118,7 +1118,7 @@ static int transition_frequency_pstate(struct powernow_k8_data *data,
static int powernowk8_target(struct cpufreq_policy *pol,
unsigned targfreq, unsigned relation)
{
- cpumask_t oldmask;
+ cpumask_var_t oldmask;
struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu);
u32 checkfid;
u32 checkvid;
@@ -1131,9 +1131,13 @@ static int powernowk8_target(struct cpufreq_policy *pol,
checkfid = data->currfid;
checkvid = data->currvid;
- /* only run on specific CPU from here on */
- oldmask = current->cpus_allowed;
- set_cpus_allowed_ptr(current, &cpumask_of_cpu(pol->cpu));
+ /* only run on specific CPU from here on. */
+ /* This is poor form: use a workqueue or smp_call_function_single */
+ if (!alloc_cpumask_var(&oldmask, GFP_KERNEL))
+ return -ENOMEM;
+
+ cpumask_copy(oldmask, tsk_cpus_allowed(current));
+ set_cpus_allowed_ptr(current, cpumask_of(pol->cpu));
if (smp_processor_id() != pol->cpu) {
printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu);
@@ -1193,7 +1197,8 @@ static int powernowk8_target(struct cpufreq_policy *pol,
ret = 0;
err_out:
- set_cpus_allowed_ptr(current, &oldmask);
+ set_cpus_allowed_ptr(current, oldmask);
+ free_cpumask_var(oldmask);
return ret;
}
@@ -1393,14 +1398,15 @@ static struct freq_attr *powernow_k8_attr[] = {
};
static struct cpufreq_driver cpufreq_amd64_driver = {
- .verify = powernowk8_verify,
- .target = powernowk8_target,
- .init = powernowk8_cpu_init,
- .exit = __devexit_p(powernowk8_cpu_exit),
- .get = powernowk8_get,
- .name = "powernow-k8",
- .owner = THIS_MODULE,
- .attr = powernow_k8_attr,
+ .verify = powernowk8_verify,
+ .target = powernowk8_target,
+ .bios_limit = acpi_processor_get_bios_limit,
+ .init = powernowk8_cpu_init,
+ .exit = __devexit_p(powernowk8_cpu_exit),
+ .get = powernowk8_get,
+ .name = "powernow-k8",
+ .owner = THIS_MODULE,
+ .attr = powernow_k8_attr,
};
/* driver entry point for init */
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
index 3ae5a7a3a500..2ce8e0b5cc54 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
@@ -39,7 +39,7 @@ static struct pci_dev *speedstep_chipset_dev;
/* speedstep_processor
*/
-static unsigned int speedstep_processor;
+static enum speedstep_processor speedstep_processor;
static u32 pmbase;
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
index f4c290b8482f..ad0083abfa23 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
@@ -34,7 +34,7 @@ static int relaxed_check;
* GET PROCESSOR CORE SPEED IN KHZ *
*********************************************************************/
-static unsigned int pentium3_get_frequency(unsigned int processor)
+static unsigned int pentium3_get_frequency(enum speedstep_processor processor)
{
/* See table 14 of p3_ds.pdf and table 22 of 29834003.pdf */
struct {
@@ -227,7 +227,7 @@ static unsigned int pentium4_get_frequency(void)
/* Warning: may get called from smp_call_function_single. */
-unsigned int speedstep_get_frequency(unsigned int processor)
+unsigned int speedstep_get_frequency(enum speedstep_processor processor)
{
switch (processor) {
case SPEEDSTEP_CPU_PCORE:
@@ -380,7 +380,7 @@ EXPORT_SYMBOL_GPL(speedstep_detect_processor);
* DETECT SPEEDSTEP SPEEDS *
*********************************************************************/
-unsigned int speedstep_get_freqs(unsigned int processor,
+unsigned int speedstep_get_freqs(enum speedstep_processor processor,
unsigned int *low_speed,
unsigned int *high_speed,
unsigned int *transition_latency,
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h
index 2b6c04e5a304..70d9cea1219d 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h
@@ -11,18 +11,18 @@
/* processors */
-
-#define SPEEDSTEP_CPU_PIII_C_EARLY 0x00000001 /* Coppermine core */
-#define SPEEDSTEP_CPU_PIII_C 0x00000002 /* Coppermine core */
-#define SPEEDSTEP_CPU_PIII_T 0x00000003 /* Tualatin core */
-#define SPEEDSTEP_CPU_P4M 0x00000004 /* P4-M */
-
+enum speedstep_processor {
+ SPEEDSTEP_CPU_PIII_C_EARLY = 0x00000001, /* Coppermine core */
+ SPEEDSTEP_CPU_PIII_C = 0x00000002, /* Coppermine core */
+ SPEEDSTEP_CPU_PIII_T = 0x00000003, /* Tualatin core */
+ SPEEDSTEP_CPU_P4M = 0x00000004, /* P4-M */
/* the following processors are not speedstep-capable and are not auto-detected
* in speedstep_detect_processor(). However, their speed can be detected using
* the speedstep_get_frequency() call. */
-#define SPEEDSTEP_CPU_PM 0xFFFFFF03 /* Pentium M */
-#define SPEEDSTEP_CPU_P4D 0xFFFFFF04 /* desktop P4 */
-#define SPEEDSTEP_CPU_PCORE 0xFFFFFF05 /* Core */
+ SPEEDSTEP_CPU_PM = 0xFFFFFF03, /* Pentium M */
+ SPEEDSTEP_CPU_P4D = 0xFFFFFF04, /* desktop P4 */
+ SPEEDSTEP_CPU_PCORE = 0xFFFFFF05, /* Core */
+};
/* speedstep states -- only two of them */
@@ -31,10 +31,10 @@
/* detect a speedstep-capable processor */
-extern unsigned int speedstep_detect_processor (void);
+extern enum speedstep_processor speedstep_detect_processor(void);
/* detect the current speed (in khz) of the processor */
-extern unsigned int speedstep_get_frequency(unsigned int processor);
+extern unsigned int speedstep_get_frequency(enum speedstep_processor processor);
/* detect the low and high speeds of the processor. The callback
@@ -42,7 +42,7 @@ extern unsigned int speedstep_get_frequency(unsigned int processor);
* SPEEDSTEP_LOW; the second argument is zero so that no
* cpufreq_notify_transition calls are initiated.
*/
-extern unsigned int speedstep_get_freqs(unsigned int processor,
+extern unsigned int speedstep_get_freqs(enum speedstep_processor processor,
unsigned int *low_speed,
unsigned int *high_speed,
unsigned int *transition_latency,
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c b/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c
index befea088e4f5..04d73c114e49 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c
@@ -35,7 +35,7 @@ static int smi_cmd;
static unsigned int smi_sig;
/* info about the processor */
-static unsigned int speedstep_processor;
+static enum speedstep_processor speedstep_processor;
/*
* There are only two frequency states for each processor. Values
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 40e1835b35e8..879666f4d871 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -70,7 +70,6 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
if (c->x86_power & (1 << 8)) {
set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC);
- set_cpu_cap(c, X86_FEATURE_TSC_RELIABLE);
sched_clock_stable = 1;
}
@@ -263,11 +262,13 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
/* Don't do the funky fallback heuristics the AMD version employs
for now. */
node = apicid_to_node[apicid];
- if (node == NUMA_NO_NODE || !node_online(node))
+ if (node == NUMA_NO_NODE)
node = first_node(node_online_map);
+ else if (!node_online(node)) {
+ /* reuse the value from init_cpu_to_node() */
+ node = cpu_to_node(cpu);
+ }
numa_set_node(cpu, node);
-
- printk(KERN_INFO "CPU %d/0x%x -> Node %d\n", cpu, apicid, node);
#endif
}
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 0df4c2b7107f..fc6c8ef92dcc 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -94,7 +94,7 @@ static const struct _cache_table __cpuinitconst cache_table[] =
{ 0xd1, LVL_3, 1024 }, /* 4-way set assoc, 64 byte line size */
{ 0xd2, LVL_3, 2048 }, /* 4-way set assoc, 64 byte line size */
{ 0xd6, LVL_3, 1024 }, /* 8-way set assoc, 64 byte line size */
- { 0xd7, LVL_3, 2038 }, /* 8-way set assoc, 64 byte line size */
+ { 0xd7, LVL_3, 2048 }, /* 8-way set assoc, 64 byte line size */
{ 0xd8, LVL_3, 4096 }, /* 12-way set assoc, 64 byte line size */
{ 0xdc, LVL_3, 2048 }, /* 12-way set assoc, 64 byte line size */
{ 0xdd, LVL_3, 4096 }, /* 12-way set assoc, 64 byte line size */
@@ -102,6 +102,9 @@ static const struct _cache_table __cpuinitconst cache_table[] =
{ 0xe2, LVL_3, 2048 }, /* 16-way set assoc, 64 byte line size */
{ 0xe3, LVL_3, 4096 }, /* 16-way set assoc, 64 byte line size */
{ 0xe4, LVL_3, 8192 }, /* 16-way set assoc, 64 byte line size */
+ { 0xea, LVL_3, 12288 }, /* 24-way set assoc, 64 byte line size */
+ { 0xeb, LVL_3, 18432 }, /* 24-way set assoc, 64 byte line size */
+ { 0xec, LVL_3, 24576 }, /* 24-way set assoc, 64 byte line size */
{ 0x00, 0, 0}
};
@@ -496,26 +499,27 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
#ifdef CONFIG_SYSFS
/* pointer to _cpuid4_info array (for each cache leaf) */
-static DEFINE_PER_CPU(struct _cpuid4_info *, cpuid4_info);
-#define CPUID4_INFO_IDX(x, y) (&((per_cpu(cpuid4_info, x))[y]))
+static DEFINE_PER_CPU(struct _cpuid4_info *, ici_cpuid4_info);
+#define CPUID4_INFO_IDX(x, y) (&((per_cpu(ici_cpuid4_info, x))[y]))
#ifdef CONFIG_SMP
static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
{
struct _cpuid4_info *this_leaf, *sibling_leaf;
unsigned long num_threads_sharing;
- int index_msb, i;
+ int index_msb, i, sibling;
struct cpuinfo_x86 *c = &cpu_data(cpu);
if ((index == 3) && (c->x86_vendor == X86_VENDOR_AMD)) {
- struct cpuinfo_x86 *d;
- for_each_online_cpu(i) {
- if (!per_cpu(cpuid4_info, i))
+ for_each_cpu(i, c->llc_shared_map) {
+ if (!per_cpu(ici_cpuid4_info, i))
continue;
- d = &cpu_data(i);
this_leaf = CPUID4_INFO_IDX(i, index);
- cpumask_copy(to_cpumask(this_leaf->shared_cpu_map),
- d->llc_shared_map);
+ for_each_cpu(sibling, c->llc_shared_map) {
+ if (!cpu_online(sibling))
+ continue;
+ set_bit(sibling, this_leaf->shared_cpu_map);
+ }
}
return;
}
@@ -532,7 +536,7 @@ static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
c->apicid >> index_msb) {
cpumask_set_cpu(i,
to_cpumask(this_leaf->shared_cpu_map));
- if (i != cpu && per_cpu(cpuid4_info, i)) {
+ if (i != cpu && per_cpu(ici_cpuid4_info, i)) {
sibling_leaf =
CPUID4_INFO_IDX(i, index);
cpumask_set_cpu(cpu, to_cpumask(
@@ -571,8 +575,8 @@ static void __cpuinit free_cache_attributes(unsigned int cpu)
for (i = 0; i < num_cache_leaves; i++)
cache_remove_shared_cpu_map(cpu, i);
- kfree(per_cpu(cpuid4_info, cpu));
- per_cpu(cpuid4_info, cpu) = NULL;
+ kfree(per_cpu(ici_cpuid4_info, cpu));
+ per_cpu(ici_cpuid4_info, cpu) = NULL;
}
static int
@@ -611,15 +615,15 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu)
if (num_cache_leaves == 0)
return -ENOENT;
- per_cpu(cpuid4_info, cpu) = kzalloc(
+ per_cpu(ici_cpuid4_info, cpu) = kzalloc(
sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL);
- if (per_cpu(cpuid4_info, cpu) == NULL)
+ if (per_cpu(ici_cpuid4_info, cpu) == NULL)
return -ENOMEM;
smp_call_function_single(cpu, get_cpu_leaves, &retval, true);
if (retval) {
- kfree(per_cpu(cpuid4_info, cpu));
- per_cpu(cpuid4_info, cpu) = NULL;
+ kfree(per_cpu(ici_cpuid4_info, cpu));
+ per_cpu(ici_cpuid4_info, cpu) = NULL;
}
return retval;
@@ -631,7 +635,7 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu)
extern struct sysdev_class cpu_sysdev_class; /* from drivers/base/cpu.c */
/* pointer to kobject for cpuX/cache */
-static DEFINE_PER_CPU(struct kobject *, cache_kobject);
+static DEFINE_PER_CPU(struct kobject *, ici_cache_kobject);
struct _index_kobject {
struct kobject kobj;
@@ -640,8 +644,8 @@ struct _index_kobject {
};
/* pointer to array of kobjects for cpuX/cache/indexY */
-static DEFINE_PER_CPU(struct _index_kobject *, index_kobject);
-#define INDEX_KOBJECT_PTR(x, y) (&((per_cpu(index_kobject, x))[y]))
+static DEFINE_PER_CPU(struct _index_kobject *, ici_index_kobject);
+#define INDEX_KOBJECT_PTR(x, y) (&((per_cpu(ici_index_kobject, x))[y]))
#define show_one_plus(file_name, object, val) \
static ssize_t show_##file_name \
@@ -860,10 +864,10 @@ static struct kobj_type ktype_percpu_entry = {
static void __cpuinit cpuid4_cache_sysfs_exit(unsigned int cpu)
{
- kfree(per_cpu(cache_kobject, cpu));
- kfree(per_cpu(index_kobject, cpu));
- per_cpu(cache_kobject, cpu) = NULL;
- per_cpu(index_kobject, cpu) = NULL;
+ kfree(per_cpu(ici_cache_kobject, cpu));
+ kfree(per_cpu(ici_index_kobject, cpu));
+ per_cpu(ici_cache_kobject, cpu) = NULL;
+ per_cpu(ici_index_kobject, cpu) = NULL;
free_cache_attributes(cpu);
}
@@ -879,14 +883,14 @@ static int __cpuinit cpuid4_cache_sysfs_init(unsigned int cpu)
return err;
/* Allocate all required memory */
- per_cpu(cache_kobject, cpu) =
+ per_cpu(ici_cache_kobject, cpu) =
kzalloc(sizeof(struct kobject), GFP_KERNEL);
- if (unlikely(per_cpu(cache_kobject, cpu) == NULL))
+ if (unlikely(per_cpu(ici_cache_kobject, cpu) == NULL))
goto err_out;
- per_cpu(index_kobject, cpu) = kzalloc(
+ per_cpu(ici_index_kobject, cpu) = kzalloc(
sizeof(struct _index_kobject) * num_cache_leaves, GFP_KERNEL);
- if (unlikely(per_cpu(index_kobject, cpu) == NULL))
+ if (unlikely(per_cpu(ici_index_kobject, cpu) == NULL))
goto err_out;
return 0;
@@ -910,7 +914,7 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
if (unlikely(retval < 0))
return retval;
- retval = kobject_init_and_add(per_cpu(cache_kobject, cpu),
+ retval = kobject_init_and_add(per_cpu(ici_cache_kobject, cpu),
&ktype_percpu_entry,
&sys_dev->kobj, "%s", "cache");
if (retval < 0) {
@@ -924,12 +928,12 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
this_object->index = i;
retval = kobject_init_and_add(&(this_object->kobj),
&ktype_cache,
- per_cpu(cache_kobject, cpu),
+ per_cpu(ici_cache_kobject, cpu),
"index%1lu", i);
if (unlikely(retval)) {
for (j = 0; j < i; j++)
kobject_put(&(INDEX_KOBJECT_PTR(cpu, j)->kobj));
- kobject_put(per_cpu(cache_kobject, cpu));
+ kobject_put(per_cpu(ici_cache_kobject, cpu));
cpuid4_cache_sysfs_exit(cpu);
return retval;
}
@@ -937,7 +941,7 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
}
cpumask_set_cpu(cpu, to_cpumask(cache_dev_map));
- kobject_uevent(per_cpu(cache_kobject, cpu), KOBJ_ADD);
+ kobject_uevent(per_cpu(ici_cache_kobject, cpu), KOBJ_ADD);
return 0;
}
@@ -946,7 +950,7 @@ static void __cpuinit cache_remove_dev(struct sys_device * sys_dev)
unsigned int cpu = sys_dev->id;
unsigned long i;
- if (per_cpu(cpuid4_info, cpu) == NULL)
+ if (per_cpu(ici_cpuid4_info, cpu) == NULL)
return;
if (!cpumask_test_cpu(cpu, to_cpumask(cache_dev_map)))
return;
@@ -954,7 +958,7 @@ static void __cpuinit cache_remove_dev(struct sys_device * sys_dev)
for (i = 0; i < num_cache_leaves; i++)
kobject_put(&(INDEX_KOBJECT_PTR(cpu, i)->kobj));
- kobject_put(per_cpu(cache_kobject, cpu));
+ kobject_put(per_cpu(ici_cache_kobject, cpu));
cpuid4_cache_sysfs_exit(cpu);
}
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c
index 472763d92098..73734baa50f2 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -74,7 +74,7 @@ static void raise_exception(struct mce *m, struct pt_regs *pregs)
m->finished = 0;
}
-static cpumask_t mce_inject_cpumask;
+static cpumask_var_t mce_inject_cpumask;
static int mce_raise_notify(struct notifier_block *self,
unsigned long val, void *data)
@@ -82,9 +82,9 @@ static int mce_raise_notify(struct notifier_block *self,
struct die_args *args = (struct die_args *)data;
int cpu = smp_processor_id();
struct mce *m = &__get_cpu_var(injectm);
- if (val != DIE_NMI_IPI || !cpu_isset(cpu, mce_inject_cpumask))
+ if (val != DIE_NMI_IPI || !cpumask_test_cpu(cpu, mce_inject_cpumask))
return NOTIFY_DONE;
- cpu_clear(cpu, mce_inject_cpumask);
+ cpumask_clear_cpu(cpu, mce_inject_cpumask);
if (m->inject_flags & MCJ_EXCEPTION)
raise_exception(m, args->regs);
else if (m->status)
@@ -148,22 +148,22 @@ static void raise_mce(struct mce *m)
unsigned long start;
int cpu;
get_online_cpus();
- mce_inject_cpumask = cpu_online_map;
- cpu_clear(get_cpu(), mce_inject_cpumask);
+ cpumask_copy(mce_inject_cpumask, cpu_online_mask);
+ cpumask_clear_cpu(get_cpu(), mce_inject_cpumask);
for_each_online_cpu(cpu) {
struct mce *mcpu = &per_cpu(injectm, cpu);
if (!mcpu->finished ||
MCJ_CTX(mcpu->inject_flags) != MCJ_CTX_RANDOM)
- cpu_clear(cpu, mce_inject_cpumask);
+ cpumask_clear_cpu(cpu, mce_inject_cpumask);
}
- if (!cpus_empty(mce_inject_cpumask))
- apic->send_IPI_mask(&mce_inject_cpumask, NMI_VECTOR);
+ if (!cpumask_empty(mce_inject_cpumask))
+ apic->send_IPI_mask(mce_inject_cpumask, NMI_VECTOR);
start = jiffies;
- while (!cpus_empty(mce_inject_cpumask)) {
+ while (!cpumask_empty(mce_inject_cpumask)) {
if (!time_before(jiffies, start + 2*HZ)) {
printk(KERN_ERR
"Timeout waiting for mce inject NMI %lx\n",
- *cpus_addr(mce_inject_cpumask));
+ *cpumask_bits(mce_inject_cpumask));
break;
}
cpu_relax();
@@ -210,6 +210,8 @@ static ssize_t mce_write(struct file *filp, const char __user *ubuf,
static int inject_init(void)
{
+ if (!alloc_cpumask_var(&mce_inject_cpumask, GFP_KERNEL))
+ return -ENOMEM;
printk(KERN_INFO "Machine check injector initialized\n");
mce_chrdev_ops.write = mce_write;
register_die_notifier(&mce_raise_nb);
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 0bcaa3875863..a8aacd4b513c 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1388,13 +1388,14 @@ static void __mcheck_cpu_init_timer(void)
struct timer_list *t = &__get_cpu_var(mce_timer);
int *n = &__get_cpu_var(mce_next_interval);
+ setup_timer(t, mce_start_timer, smp_processor_id());
+
if (mce_ignore_ce)
return;
*n = check_interval * HZ;
if (!*n)
return;
- setup_timer(t, mce_start_timer, smp_processor_id());
t->expires = round_jiffies(jiffies + *n);
add_timer_on(t, smp_processor_id());
}
@@ -1928,7 +1929,7 @@ error2:
sysdev_remove_file(&per_cpu(mce_dev, cpu), &mce_banks[j].attr);
error:
while (--i >= 0)
- sysdev_remove_file(&per_cpu(mce_dev, cpu), &mce_banks[i].attr);
+ sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]);
sysdev_unregister(&per_cpu(mce_dev, cpu));
@@ -2016,9 +2017,11 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
break;
case CPU_DOWN_FAILED:
case CPU_DOWN_FAILED_FROZEN:
- t->expires = round_jiffies(jiffies +
+ if (!mce_ignore_ce && check_interval) {
+ t->expires = round_jiffies(jiffies +
__get_cpu_var(mce_next_interval));
- add_timer_on(t, cpu);
+ add_timer_on(t, cpu);
+ }
smp_call_function_single(cpu, mce_reenable_cpu, &action, 1);
break;
case CPU_POST_DEAD:
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 4fef985fc221..81c499eceb21 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -256,6 +256,16 @@ asmlinkage void smp_thermal_interrupt(struct pt_regs *regs)
ack_APIC_irq();
}
+/* Thermal monitoring depends on APIC, ACPI and clock modulation */
+static int intel_thermal_supported(struct cpuinfo_x86 *c)
+{
+ if (!cpu_has_apic)
+ return 0;
+ if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC))
+ return 0;
+ return 1;
+}
+
void __init mcheck_intel_therm_init(void)
{
/*
@@ -263,8 +273,7 @@ void __init mcheck_intel_therm_init(void)
* LVT value on BSP and use that value to restore APs' thermal LVT
* entry BIOS programmed later
*/
- if (cpu_has(&boot_cpu_data, X86_FEATURE_ACPI) &&
- cpu_has(&boot_cpu_data, X86_FEATURE_ACC))
+ if (intel_thermal_supported(&boot_cpu_data))
lvtthmr_init = apic_read(APIC_LVTTHMR);
}
@@ -274,8 +283,7 @@ void intel_init_thermal(struct cpuinfo_x86 *c)
int tm2 = 0;
u32 l, h;
- /* Thermal monitoring depends on ACPI and clock modulation*/
- if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC))
+ if (!intel_thermal_supported(c))
return;
/*
@@ -339,8 +347,8 @@ void intel_init_thermal(struct cpuinfo_x86 *c)
l = apic_read(APIC_LVTTHMR);
apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
- printk(KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n",
- cpu, tm2 ? "TM2" : "TM1");
+ printk_once(KERN_INFO "CPU0: Thermal monitoring enabled (%s)\n",
+ tm2 ? "TM2" : "TM1");
/* enable thermal throttle processing */
atomic_set(&therm_throt_en, 1);
diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c
index 73c86db5acbe..09b1698e0466 100644
--- a/arch/x86/kernel/cpu/mtrr/cleanup.c
+++ b/arch/x86/kernel/cpu/mtrr/cleanup.c
@@ -170,6 +170,41 @@ static int __init cmp_range(const void *x1, const void *x2)
return start1 - start2;
}
+static int __init clean_sort_range(struct res_range *range, int az)
+{
+ int i, j, k = az - 1, nr_range = 0;
+
+ for (i = 0; i < k; i++) {
+ if (range[i].end)
+ continue;
+ for (j = k; j > i; j--) {
+ if (range[j].end) {
+ k = j;
+ break;
+ }
+ }
+ if (j == i)
+ break;
+ range[i].start = range[k].start;
+ range[i].end = range[k].end;
+ range[k].start = 0;
+ range[k].end = 0;
+ k--;
+ }
+ /* count it */
+ for (i = 0; i < az; i++) {
+ if (!range[i].end) {
+ nr_range = i;
+ break;
+ }
+ }
+
+ /* sort them */
+ sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
+
+ return nr_range;
+}
+
#define BIOS_BUG_MSG KERN_WARNING \
"WARNING: BIOS bug: VAR MTRR %d contains strange UC entry under 1M, check with your system vendor!\n"
@@ -223,22 +258,18 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
subtract_range(range, extra_remove_base,
extra_remove_base + extra_remove_size - 1);
- /* get new range num */
- nr_range = 0;
- for (i = 0; i < RANGE_NUM; i++) {
- if (!range[i].end)
- continue;
- nr_range++;
- }
if (debug_print) {
printk(KERN_DEBUG "After UC checking\n");
- for (i = 0; i < nr_range; i++)
+ for (i = 0; i < RANGE_NUM; i++) {
+ if (!range[i].end)
+ continue;
printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
range[i].start, range[i].end + 1);
+ }
}
/* sort the ranges */
- sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
+ nr_range = clean_sort_range(range, RANGE_NUM);
if (debug_print) {
printk(KERN_DEBUG "After sorting\n");
for (i = 0; i < nr_range; i++)
@@ -689,8 +720,6 @@ static int __init mtrr_need_cleanup(void)
continue;
if (!size)
type = MTRR_NUM_TYPES;
- if (type == MTRR_TYPE_WRPROT)
- type = MTRR_TYPE_UNCACHABLE;
num[type]++;
}
diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c
index 3c1b12d461d1..e006e56f699c 100644
--- a/arch/x86/kernel/cpu/mtrr/if.c
+++ b/arch/x86/kernel/cpu/mtrr/if.c
@@ -4,6 +4,7 @@
#include <linux/proc_fs.h>
#include <linux/module.h>
#include <linux/ctype.h>
+#include <linux/string.h>
#include <linux/init.h>
#define LINE_SIZE 80
@@ -133,8 +134,7 @@ mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos)
return -EINVAL;
base = simple_strtoull(line + 5, &ptr, 0);
- while (isspace(*ptr))
- ptr++;
+ ptr = skip_spaces(ptr);
if (strncmp(ptr, "size=", 5))
return -EINVAL;
@@ -142,14 +142,11 @@ mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos)
size = simple_strtoull(ptr + 5, &ptr, 0);
if ((base & 0xfff) || (size & 0xfff))
return -EINVAL;
- while (isspace(*ptr))
- ptr++;
+ ptr = skip_spaces(ptr);
if (strncmp(ptr, "type=", 5))
return -EINVAL;
- ptr += 5;
- while (isspace(*ptr))
- ptr++;
+ ptr = skip_spaces(ptr + 5);
for (i = 0; i < MTRR_NUM_TYPES; ++i) {
if (strcmp(ptr, mtrr_strings[i]))
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index c1bbed1021d9..c223b7e895d9 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1286,7 +1286,7 @@ x86_perf_event_set_period(struct perf_event *event,
return 0;
/*
- * If we are way outside a reasoable range then just skip forward:
+ * If we are way outside a reasonable range then just skip forward:
*/
if (unlikely(left <= -period)) {
left = period;
@@ -1632,6 +1632,7 @@ static void intel_pmu_drain_bts_buffer(struct cpu_hw_events *cpuc)
data.period = event->hw.last_period;
data.addr = 0;
+ data.raw = NULL;
regs.ip = 0;
/*
@@ -1749,6 +1750,7 @@ static int p6_pmu_handle_irq(struct pt_regs *regs)
u64 val;
data.addr = 0;
+ data.raw = NULL;
cpuc = &__get_cpu_var(cpu_hw_events);
@@ -1794,6 +1796,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
u64 ack, status;
data.addr = 0;
+ data.raw = NULL;
cpuc = &__get_cpu_var(cpu_hw_events);
@@ -1857,6 +1860,7 @@ static int amd_pmu_handle_irq(struct pt_regs *regs)
u64 val;
data.addr = 0;
+ data.raw = NULL;
cpuc = &__get_cpu_var(cpu_hw_events);
@@ -2062,12 +2066,6 @@ static __init int p6_pmu_init(void)
x86_pmu = p6_pmu;
- if (!cpu_has_apic) {
- pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n");
- pr_info("no hardware sampling interrupt available.\n");
- x86_pmu.apic = 0;
- }
-
return 0;
}
@@ -2159,6 +2157,16 @@ static __init int amd_pmu_init(void)
return 0;
}
+static void __init pmu_check_apic(void)
+{
+ if (cpu_has_apic)
+ return;
+
+ x86_pmu.apic = 0;
+ pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n");
+ pr_info("no hardware sampling interrupt available.\n");
+}
+
void __init init_hw_perf_events(void)
{
int err;
@@ -2180,6 +2188,8 @@ void __init init_hw_perf_events(void)
return;
}
+ pmu_check_apic();
+
pr_cont("%s PMU driver.\n", x86_pmu.name);
if (x86_pmu.num_events > X86_PMC_MAX_GENERIC) {
@@ -2287,7 +2297,7 @@ void callchain_store(struct perf_callchain_entry *entry, u64 ip)
static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry);
static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry);
-static DEFINE_PER_CPU(int, in_nmi_frame);
+static DEFINE_PER_CPU(int, in_ignored_frame);
static void
@@ -2303,8 +2313,9 @@ static void backtrace_warning(void *data, char *msg)
static int backtrace_stack(void *data, char *name)
{
- per_cpu(in_nmi_frame, smp_processor_id()) =
- x86_is_stack_id(NMI_STACK, name);
+ per_cpu(in_ignored_frame, smp_processor_id()) =
+ x86_is_stack_id(NMI_STACK, name) ||
+ x86_is_stack_id(DEBUG_STACK, name);
return 0;
}
@@ -2313,7 +2324,7 @@ static void backtrace_address(void *data, unsigned long addr, int reliable)
{
struct perf_callchain_entry *entry = data;
- if (per_cpu(in_nmi_frame, smp_processor_id()))
+ if (per_cpu(in_ignored_frame, smp_processor_id()))
return;
if (reliable)
@@ -2325,6 +2336,7 @@ static const struct stacktrace_ops backtrace_ops = {
.warning_symbol = backtrace_warning_symbol,
.stack = backtrace_stack,
.address = backtrace_address,
+ .walk_stack = print_context_stack_bp,
};
#include "../dumpstack.h"
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c
index 7ef24a796992..cb27fd6136c9 100644
--- a/arch/x86/kernel/cpuid.c
+++ b/arch/x86/kernel/cpuid.c
@@ -187,7 +187,8 @@ static int __init cpuid_init(void)
int i, err = 0;
i = 0;
- if (register_chrdev(CPUID_MAJOR, "cpu/cpuid", &cpuid_fops)) {
+ if (__register_chrdev(CPUID_MAJOR, 0, NR_CPUS,
+ "cpu/cpuid", &cpuid_fops)) {
printk(KERN_ERR "cpuid: unable to get major %d for cpuid\n",
CPUID_MAJOR);
err = -EBUSY;
@@ -216,7 +217,7 @@ out_class:
}
class_destroy(cpuid_class);
out_chrdev:
- unregister_chrdev(CPUID_MAJOR, "cpu/cpuid");
+ __unregister_chrdev(CPUID_MAJOR, 0, NR_CPUS, "cpu/cpuid");
out:
return err;
}
diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c
index ef42a038f1a6..1c47390dd0e5 100644
--- a/arch/x86/kernel/ds.c
+++ b/arch/x86/kernel/ds.c
@@ -265,13 +265,13 @@ struct ds_context {
int cpu;
};
-static DEFINE_PER_CPU(struct ds_context *, cpu_context);
+static DEFINE_PER_CPU(struct ds_context *, cpu_ds_context);
static struct ds_context *ds_get_context(struct task_struct *task, int cpu)
{
struct ds_context **p_context =
- (task ? &task->thread.ds_ctx : &per_cpu(cpu_context, cpu));
+ (task ? &task->thread.ds_ctx : &per_cpu(cpu_ds_context, cpu));
struct ds_context *context = NULL;
struct ds_context *new_context = NULL;
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index b8ce165dde5d..c56bc2873030 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -109,6 +109,30 @@ print_context_stack(struct thread_info *tinfo,
}
return bp;
}
+EXPORT_SYMBOL_GPL(print_context_stack);
+
+unsigned long
+print_context_stack_bp(struct thread_info *tinfo,
+ unsigned long *stack, unsigned long bp,
+ const struct stacktrace_ops *ops, void *data,
+ unsigned long *end, int *graph)
+{
+ struct stack_frame *frame = (struct stack_frame *)bp;
+ unsigned long *ret_addr = &frame->return_address;
+
+ while (valid_stack_ptr(tinfo, ret_addr, sizeof(*ret_addr), end)) {
+ unsigned long addr = *ret_addr;
+
+ if (__kernel_text_address(addr)) {
+ ops->address(data, addr, 1);
+ frame = frame->next_frame;
+ ret_addr = &frame->return_address;
+ print_ftrace_graph_addr(addr, data, ops, tinfo, graph);
+ }
+ }
+ return (unsigned long)frame;
+}
+EXPORT_SYMBOL_GPL(print_context_stack_bp);
static void
@@ -141,10 +165,11 @@ static void print_trace_address(void *data, unsigned long addr, int reliable)
}
static const struct stacktrace_ops print_trace_ops = {
- .warning = print_trace_warning,
- .warning_symbol = print_trace_warning_symbol,
- .stack = print_trace_stack,
- .address = print_trace_address,
+ .warning = print_trace_warning,
+ .warning_symbol = print_trace_warning_symbol,
+ .stack = print_trace_stack,
+ .address = print_trace_address,
+ .walk_stack = print_context_stack,
};
void
@@ -188,7 +213,7 @@ void dump_stack(void)
}
EXPORT_SYMBOL(dump_stack);
-static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED;
+static arch_spinlock_t die_lock = __ARCH_SPIN_LOCK_UNLOCKED;
static int die_owner = -1;
static unsigned int die_nest_count;
@@ -207,11 +232,11 @@ unsigned __kprobes long oops_begin(void)
/* racy, but better than risking deadlock. */
raw_local_irq_save(flags);
cpu = smp_processor_id();
- if (!__raw_spin_trylock(&die_lock)) {
+ if (!arch_spin_trylock(&die_lock)) {
if (cpu == die_owner)
/* nested oops. should stop eventually */;
else
- __raw_spin_lock(&die_lock);
+ arch_spin_lock(&die_lock);
}
die_nest_count++;
die_owner = cpu;
@@ -231,7 +256,7 @@ void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
die_nest_count--;
if (!die_nest_count)
/* Nest count reaches zero, release the lock. */
- __raw_spin_unlock(&die_lock);
+ arch_spin_unlock(&die_lock);
raw_local_irq_restore(flags);
oops_exit();
diff --git a/arch/x86/kernel/dumpstack.h b/arch/x86/kernel/dumpstack.h
index 81086c227ab7..4fd1420faffa 100644
--- a/arch/x86/kernel/dumpstack.h
+++ b/arch/x86/kernel/dumpstack.h
@@ -14,12 +14,6 @@
#define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :)
#endif
-extern unsigned long
-print_context_stack(struct thread_info *tinfo,
- unsigned long *stack, unsigned long bp,
- const struct stacktrace_ops *ops, void *data,
- unsigned long *end, int *graph);
-
extern void
show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
unsigned long *stack, unsigned long bp, char *log_lvl);
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index e0ed4c7abb62..ae775ca47b25 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -58,7 +58,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
context = (struct thread_info *)
((unsigned long)stack & (~(THREAD_SIZE - 1)));
- bp = print_context_stack(context, stack, bp, ops, data, NULL, &graph);
+ bp = ops->walk_stack(context, stack, bp, ops, data, NULL, &graph);
stack = (unsigned long *)context->previous_esp;
if (!stack)
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 8e740934bd1f..0ad9597073f5 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -103,6 +103,35 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
return NULL;
}
+static inline int
+in_irq_stack(unsigned long *stack, unsigned long *irq_stack,
+ unsigned long *irq_stack_end)
+{
+ return (stack >= irq_stack && stack < irq_stack_end);
+}
+
+/*
+ * We are returning from the irq stack and go to the previous one.
+ * If the previous stack is also in the irq stack, then bp in the first
+ * frame of the irq stack points to the previous, interrupted one.
+ * Otherwise we have another level of indirection: We first save
+ * the bp of the previous stack, then we switch the stack to the irq one
+ * and save a new bp that links to the previous one.
+ * (See save_args())
+ */
+static inline unsigned long
+fixup_bp_irq_link(unsigned long bp, unsigned long *stack,
+ unsigned long *irq_stack, unsigned long *irq_stack_end)
+{
+#ifdef CONFIG_FRAME_POINTER
+ struct stack_frame *frame = (struct stack_frame *)bp;
+
+ if (!in_irq_stack(stack, irq_stack, irq_stack_end))
+ return (unsigned long)frame->next_frame;
+#endif
+ return bp;
+}
+
/*
* x86-64 can have up to three kernel stacks:
* process stack
@@ -159,8 +188,8 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
if (ops->stack(data, id) < 0)
break;
- bp = print_context_stack(tinfo, stack, bp, ops,
- data, estack_end, &graph);
+ bp = ops->walk_stack(tinfo, stack, bp, ops,
+ data, estack_end, &graph);
ops->stack(data, "<EOE>");
/*
* We link to the next stack via the
@@ -175,7 +204,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
irq_stack = irq_stack_end -
(IRQ_STACK_SIZE - 64) / sizeof(*irq_stack);
- if (stack >= irq_stack && stack < irq_stack_end) {
+ if (in_irq_stack(stack, irq_stack, irq_stack_end)) {
if (ops->stack(data, "IRQ") < 0)
break;
bp = print_context_stack(tinfo, stack, bp,
@@ -186,6 +215,8 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
* pointer (index -1 to end) in the IRQ stack:
*/
stack = (unsigned long *) (irq_stack_end[-1]);
+ bp = fixup_bp_irq_link(bp, stack, irq_stack,
+ irq_stack_end);
irq_stack_end = NULL;
ops->stack(data, "EOI");
continue;
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index d17d482a04f4..05ed7ab2ca48 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -724,7 +724,7 @@ core_initcall(e820_mark_nvs_memory);
/*
* Early reserved memory areas.
*/
-#define MAX_EARLY_RES 20
+#define MAX_EARLY_RES 32
struct early_res {
u64 start, end;
@@ -732,7 +732,16 @@ struct early_res {
char overlap_ok;
};
static struct early_res early_res[MAX_EARLY_RES] __initdata = {
- { 0, PAGE_SIZE, "BIOS data page" }, /* BIOS data page */
+ { 0, PAGE_SIZE, "BIOS data page", 1 }, /* BIOS data page */
+#ifdef CONFIG_X86_32
+ /*
+ * But first pinch a few for the stack/trampoline stuff
+ * FIXME: Don't need the extra page at 4K, but need to fix
+ * trampoline before removing it. (see the GDT stuff)
+ */
+ { PAGE_SIZE, PAGE_SIZE, "EX TRAMPOLINE", 1 },
+#endif
+
{}
};
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 50b9c220e121..44a8e0dc6737 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -725,22 +725,61 @@ END(syscall_badsys)
/*
* System calls that need a pt_regs pointer.
*/
-#define PTREGSCALL(name) \
+#define PTREGSCALL0(name) \
ALIGN; \
ptregs_##name: \
leal 4(%esp),%eax; \
jmp sys_##name;
-PTREGSCALL(iopl)
-PTREGSCALL(fork)
-PTREGSCALL(clone)
-PTREGSCALL(vfork)
-PTREGSCALL(execve)
-PTREGSCALL(sigaltstack)
-PTREGSCALL(sigreturn)
-PTREGSCALL(rt_sigreturn)
-PTREGSCALL(vm86)
-PTREGSCALL(vm86old)
+#define PTREGSCALL1(name) \
+ ALIGN; \
+ptregs_##name: \
+ leal 4(%esp),%edx; \
+ movl (PT_EBX+4)(%esp),%eax; \
+ jmp sys_##name;
+
+#define PTREGSCALL2(name) \
+ ALIGN; \
+ptregs_##name: \
+ leal 4(%esp),%ecx; \
+ movl (PT_ECX+4)(%esp),%edx; \
+ movl (PT_EBX+4)(%esp),%eax; \
+ jmp sys_##name;
+
+#define PTREGSCALL3(name) \
+ ALIGN; \
+ptregs_##name: \
+ leal 4(%esp),%eax; \
+ pushl %eax; \
+ movl PT_EDX(%eax),%ecx; \
+ movl PT_ECX(%eax),%edx; \
+ movl PT_EBX(%eax),%eax; \
+ call sys_##name; \
+ addl $4,%esp; \
+ ret
+
+PTREGSCALL1(iopl)
+PTREGSCALL0(fork)
+PTREGSCALL0(vfork)
+PTREGSCALL3(execve)
+PTREGSCALL2(sigaltstack)
+PTREGSCALL0(sigreturn)
+PTREGSCALL0(rt_sigreturn)
+PTREGSCALL2(vm86)
+PTREGSCALL1(vm86old)
+
+/* Clone is an oddball. The 4th arg is in %edi */
+ ALIGN;
+ptregs_clone:
+ leal 4(%esp),%eax
+ pushl %eax
+ pushl PT_EDI(%eax)
+ movl PT_EDX(%eax),%ecx
+ movl PT_ECX(%eax),%edx
+ movl PT_EBX(%eax),%eax
+ call sys_clone
+ addl $8,%esp
+ ret
.macro FIXUP_ESPFIX_STACK
/*
@@ -1008,12 +1047,8 @@ END(spurious_interrupt_bug)
ENTRY(kernel_thread_helper)
pushl $0 # fake return address for unwinder
CFI_STARTPROC
- movl %edx,%eax
- push %edx
- CFI_ADJUST_CFA_OFFSET 4
- call *%ebx
- push %eax
- CFI_ADJUST_CFA_OFFSET 4
+ movl %edi,%eax
+ call *%esi
call do_exit
ud2 # padding for call trace
CFI_ENDPROC
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 4deb8fc849dd..0697ff139837 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -977,8 +977,8 @@ apicinterrupt UV_BAU_MESSAGE \
#endif
apicinterrupt LOCAL_TIMER_VECTOR \
apic_timer_interrupt smp_apic_timer_interrupt
-apicinterrupt GENERIC_INTERRUPT_VECTOR \
- generic_interrupt smp_generic_interrupt
+apicinterrupt X86_PLATFORM_IPI_VECTOR \
+ x86_platform_ipi smp_x86_platform_ipi
#ifdef CONFIG_SMP
apicinterrupt INVALIDATE_TLB_VECTOR_START+0 \
@@ -1076,10 +1076,10 @@ ENTRY(\sym)
TRACE_IRQS_OFF
movq %rsp,%rdi /* pt_regs pointer */
xorl %esi,%esi /* no error code */
- PER_CPU(init_tss, %rbp)
- subq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%rbp)
+ PER_CPU(init_tss, %r12)
+ subq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%r12)
call \do_sym
- addq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%rbp)
+ addq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%r12)
jmp paranoid_exit /* %ebx: no swapgs flag */
CFI_ENDPROC
END(\sym)
@@ -1166,63 +1166,20 @@ bad_gs:
jmp 2b
.previous
-/*
- * Create a kernel thread.
- *
- * C extern interface:
- * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
- *
- * asm input arguments:
- * rdi: fn, rsi: arg, rdx: flags
- */
-ENTRY(kernel_thread)
- CFI_STARTPROC
- FAKE_STACK_FRAME $child_rip
- SAVE_ALL
-
- # rdi: flags, rsi: usp, rdx: will be &pt_regs
- movq %rdx,%rdi
- orq kernel_thread_flags(%rip),%rdi
- movq $-1, %rsi
- movq %rsp, %rdx
-
- xorl %r8d,%r8d
- xorl %r9d,%r9d
-
- # clone now
- call do_fork
- movq %rax,RAX(%rsp)
- xorl %edi,%edi
-
- /*
- * It isn't worth to check for reschedule here,
- * so internally to the x86_64 port you can rely on kernel_thread()
- * not to reschedule the child before returning, this avoids the need
- * of hacks for example to fork off the per-CPU idle tasks.
- * [Hopefully no generic code relies on the reschedule -AK]
- */
- RESTORE_ALL
- UNFAKE_STACK_FRAME
- ret
- CFI_ENDPROC
-END(kernel_thread)
-
-ENTRY(child_rip)
+ENTRY(kernel_thread_helper)
pushq $0 # fake return address
CFI_STARTPROC
/*
* Here we are in the child and the registers are set as they were
* at kernel_thread() invocation in the parent.
*/
- movq %rdi, %rax
- movq %rsi, %rdi
- call *%rax
+ call *%rsi
# exit
mov %eax, %edi
call do_exit
ud2 # padding for call trace
CFI_ENDPROC
-END(child_rip)
+END(kernel_thread_helper)
/*
* execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 5a1b9758fd62..309689245431 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -189,9 +189,26 @@ static void wait_for_nmi(void)
nmi_wait_count++;
}
+static inline int
+within(unsigned long addr, unsigned long start, unsigned long end)
+{
+ return addr >= start && addr < end;
+}
+
static int
do_ftrace_mod_code(unsigned long ip, void *new_code)
{
+ /*
+ * On x86_64, kernel text mappings are mapped read-only with
+ * CONFIG_DEBUG_RODATA. So we use the kernel identity mapping instead
+ * of the kernel text mapping to modify the kernel text.
+ *
+ * For 32bit kernels, these mappings are same and we can use
+ * kernel identity mapping to modify code.
+ */
+ if (within(ip, (unsigned long)_text, (unsigned long)_etext))
+ ip = (unsigned long)__va(__pa(ip));
+
mod_code_ip = (void *)ip;
mod_code_newcode = new_code;
diff --git a/arch/x86/kernel/geode_32.c b/arch/x86/kernel/geode_32.c
deleted file mode 100644
index 9b08e852fd1a..000000000000
--- a/arch/x86/kernel/geode_32.c
+++ /dev/null
@@ -1,196 +0,0 @@
-/*
- * AMD Geode southbridge support code
- * Copyright (C) 2006, Advanced Micro Devices, Inc.
- * Copyright (C) 2007, Andres Salomon <dilinger@debian.org>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public License
- * as published by the Free Software Foundation.
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/ioport.h>
-#include <linux/io.h>
-#include <asm/msr.h>
-#include <asm/geode.h>
-
-static struct {
- char *name;
- u32 msr;
- int size;
- u32 base;
-} lbars[] = {
- { "geode-pms", MSR_LBAR_PMS, LBAR_PMS_SIZE, 0 },
- { "geode-acpi", MSR_LBAR_ACPI, LBAR_ACPI_SIZE, 0 },
- { "geode-gpio", MSR_LBAR_GPIO, LBAR_GPIO_SIZE, 0 },
- { "geode-mfgpt", MSR_LBAR_MFGPT, LBAR_MFGPT_SIZE, 0 }
-};
-
-static void __init init_lbars(void)
-{
- u32 lo, hi;
- int i;
-
- for (i = 0; i < ARRAY_SIZE(lbars); i++) {
- rdmsr(lbars[i].msr, lo, hi);
- if (hi & 0x01)
- lbars[i].base = lo & 0x0000ffff;
-
- if (lbars[i].base == 0)
- printk(KERN_ERR "geode: Couldn't initialize '%s'\n",
- lbars[i].name);
- }
-}
-
-int geode_get_dev_base(unsigned int dev)
-{
- BUG_ON(dev >= ARRAY_SIZE(lbars));
- return lbars[dev].base;
-}
-EXPORT_SYMBOL_GPL(geode_get_dev_base);
-
-/* === GPIO API === */
-
-void geode_gpio_set(u32 gpio, unsigned int reg)
-{
- u32 base = geode_get_dev_base(GEODE_DEV_GPIO);
-
- if (!base)
- return;
-
- /* low bank register */
- if (gpio & 0xFFFF)
- outl(gpio & 0xFFFF, base + reg);
- /* high bank register */
- gpio >>= 16;
- if (gpio)
- outl(gpio, base + 0x80 + reg);
-}
-EXPORT_SYMBOL_GPL(geode_gpio_set);
-
-void geode_gpio_clear(u32 gpio, unsigned int reg)
-{
- u32 base = geode_get_dev_base(GEODE_DEV_GPIO);
-
- if (!base)
- return;
-
- /* low bank register */
- if (gpio & 0xFFFF)
- outl((gpio & 0xFFFF) << 16, base + reg);
- /* high bank register */
- gpio &= (0xFFFF << 16);
- if (gpio)
- outl(gpio, base + 0x80 + reg);
-}
-EXPORT_SYMBOL_GPL(geode_gpio_clear);
-
-int geode_gpio_isset(u32 gpio, unsigned int reg)
-{
- u32 base = geode_get_dev_base(GEODE_DEV_GPIO);
- u32 val;
-
- if (!base)
- return 0;
-
- /* low bank register */
- if (gpio & 0xFFFF) {
- val = inl(base + reg) & (gpio & 0xFFFF);
- if ((gpio & 0xFFFF) == val)
- return 1;
- }
- /* high bank register */
- gpio >>= 16;
- if (gpio) {
- val = inl(base + 0x80 + reg) & gpio;
- if (gpio == val)
- return 1;
- }
- return 0;
-}
-EXPORT_SYMBOL_GPL(geode_gpio_isset);
-
-void geode_gpio_set_irq(unsigned int group, unsigned int irq)
-{
- u32 lo, hi;
-
- if (group > 7 || irq > 15)
- return;
-
- rdmsr(MSR_PIC_ZSEL_HIGH, lo, hi);
-
- lo &= ~(0xF << (group * 4));
- lo |= (irq & 0xF) << (group * 4);
-
- wrmsr(MSR_PIC_ZSEL_HIGH, lo, hi);
-}
-EXPORT_SYMBOL_GPL(geode_gpio_set_irq);
-
-void geode_gpio_setup_event(unsigned int gpio, int pair, int pme)
-{
- u32 base = geode_get_dev_base(GEODE_DEV_GPIO);
- u32 offset, shift, val;
-
- if (gpio >= 24)
- offset = GPIO_MAP_W;
- else if (gpio >= 16)
- offset = GPIO_MAP_Z;
- else if (gpio >= 8)
- offset = GPIO_MAP_Y;
- else
- offset = GPIO_MAP_X;
-
- shift = (gpio % 8) * 4;
-
- val = inl(base + offset);
-
- /* Clear whatever was there before */
- val &= ~(0xF << shift);
-
- /* And set the new value */
-
- val |= ((pair & 7) << shift);
-
- /* Set the PME bit if this is a PME event */
-
- if (pme)
- val |= (1 << (shift + 3));
-
- outl(val, base + offset);
-}
-EXPORT_SYMBOL_GPL(geode_gpio_setup_event);
-
-int geode_has_vsa2(void)
-{
- static int has_vsa2 = -1;
-
- if (has_vsa2 == -1) {
- u16 val;
-
- /*
- * The VSA has virtual registers that we can query for a
- * signature.
- */
- outw(VSA_VR_UNLOCK, VSA_VRC_INDEX);
- outw(VSA_VR_SIGNATURE, VSA_VRC_INDEX);
-
- val = inw(VSA_VRC_DATA);
- has_vsa2 = (val == AMD_VSA_SIG || val == GSW_VSA_SIG);
- }
-
- return has_vsa2;
-}
-EXPORT_SYMBOL_GPL(geode_has_vsa2);
-
-static int __init geode_southbridge_init(void)
-{
- if (!is_geode())
- return -ENODEV;
-
- init_lbars();
- (void) mfgpt_timer_setup();
- return 0;
-}
-
-postcore_initcall(geode_southbridge_init);
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index 4f8e2507e8f3..5051b94c9069 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -29,8 +29,6 @@ static void __init i386_default_early_setup(void)
void __init i386_start_kernel(void)
{
- reserve_trampoline_memory();
-
reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS");
#ifdef CONFIG_BLK_DEV_INITRD
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 0b06cd778fd9..b5a9896ca1e7 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -98,8 +98,6 @@ void __init x86_64_start_reservations(char *real_mode_data)
{
copy_bootdata(__va(real_mode_data));
- reserve_trampoline_memory();
-
reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS");
#ifdef CONFIG_BLK_DEV_INITRD
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 050c278481b1..7fd318bac59c 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -18,6 +18,8 @@
#include <asm/asm-offsets.h>
#include <asm/setup.h>
#include <asm/processor-flags.h>
+#include <asm/msr-index.h>
+#include <asm/cpufeature.h>
#include <asm/percpu.h>
/* Physical address */
@@ -297,25 +299,27 @@ ENTRY(startup_32_smp)
orl %edx,%eax
movl %eax,%cr4
- btl $5, %eax # check if PAE is enabled
- jnc 6f
+ testb $X86_CR4_PAE, %al # check if PAE is enabled
+ jz 6f
/* Check if extended functions are implemented */
movl $0x80000000, %eax
cpuid
- cmpl $0x80000000, %eax
- jbe 6f
+ /* Value must be in the range 0x80000001 to 0x8000ffff */
+ subl $0x80000001, %eax
+ cmpl $(0x8000ffff-0x80000001), %eax
+ ja 6f
mov $0x80000001, %eax
cpuid
/* Execute Disable bit supported? */
- btl $20, %edx
+ btl $(X86_FEATURE_NX & 31), %edx
jnc 6f
/* Setup EFER (Extended Feature Enable Register) */
- movl $0xc0000080, %ecx
+ movl $MSR_EFER, %ecx
rdmsr
- btsl $11, %eax
+ btsl $_EFER_NX, %eax
/* Make changes effective */
wrmsr
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 22db86a37643..2d8b5035371c 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -262,11 +262,11 @@ ENTRY(secondary_startup_64)
.quad x86_64_start_kernel
ENTRY(initial_gs)
.quad INIT_PER_CPU_VAR(irq_stack_union)
- __FINITDATA
ENTRY(stack_start)
.quad init_thread_union+THREAD_SIZE-8
.word 0
+ __FINITDATA
bad_address:
jmp bad_address
@@ -340,6 +340,7 @@ ENTRY(name)
i = i + 1 ; \
.endr
+ .data
/*
* This default setting generates an ident mapping at address 0x100000
* and a mapping for the kernel that precisely maps virtual address
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index dedc2bddf7a5..ba6e65884603 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -33,6 +33,7 @@
* HPET address is set in acpi/boot.c, when an ACPI entry exists
*/
unsigned long hpet_address;
+u8 hpet_blockid; /* OS timer block num */
#ifdef CONFIG_PCI_MSI
static unsigned long hpet_num_timers;
#endif
@@ -47,12 +48,12 @@ struct hpet_dev {
char name[10];
};
-unsigned long hpet_readl(unsigned long a)
+inline unsigned int hpet_readl(unsigned int a)
{
return readl(hpet_virt_address + a);
}
-static inline void hpet_writel(unsigned long d, unsigned long a)
+static inline void hpet_writel(unsigned int d, unsigned int a)
{
writel(d, hpet_virt_address + a);
}
@@ -167,7 +168,7 @@ do { \
static void hpet_reserve_msi_timers(struct hpet_data *hd);
-static void hpet_reserve_platform_timers(unsigned long id)
+static void hpet_reserve_platform_timers(unsigned int id)
{
struct hpet __iomem *hpet = hpet_virt_address;
struct hpet_timer __iomem *timer = &hpet->hpet_timers[2];
@@ -205,7 +206,7 @@ static void hpet_reserve_platform_timers(unsigned long id)
}
#else
-static void hpet_reserve_platform_timers(unsigned long id) { }
+static void hpet_reserve_platform_timers(unsigned int id) { }
#endif
/*
@@ -246,7 +247,7 @@ static void hpet_reset_counter(void)
static void hpet_start_counter(void)
{
- unsigned long cfg = hpet_readl(HPET_CFG);
+ unsigned int cfg = hpet_readl(HPET_CFG);
cfg |= HPET_CFG_ENABLE;
hpet_writel(cfg, HPET_CFG);
}
@@ -271,7 +272,7 @@ static void hpet_resume_counter(void)
static void hpet_enable_legacy_int(void)
{
- unsigned long cfg = hpet_readl(HPET_CFG);
+ unsigned int cfg = hpet_readl(HPET_CFG);
cfg |= HPET_CFG_LEGACY;
hpet_writel(cfg, HPET_CFG);
@@ -314,7 +315,7 @@ static int hpet_setup_msi_irq(unsigned int irq);
static void hpet_set_mode(enum clock_event_mode mode,
struct clock_event_device *evt, int timer)
{
- unsigned long cfg, cmp, now;
+ unsigned int cfg, cmp, now;
uint64_t delta;
switch (mode) {
@@ -323,7 +324,7 @@ static void hpet_set_mode(enum clock_event_mode mode,
delta = ((uint64_t)(NSEC_PER_SEC/HZ)) * evt->mult;
delta >>= evt->shift;
now = hpet_readl(HPET_COUNTER);
- cmp = now + (unsigned long) delta;
+ cmp = now + (unsigned int) delta;
cfg = hpet_readl(HPET_Tn_CFG(timer));
/* Make sure we use edge triggered interrupts */
cfg &= ~HPET_TN_LEVEL;
@@ -339,7 +340,7 @@ static void hpet_set_mode(enum clock_event_mode mode,
* (See AMD-8111 HyperTransport I/O Hub Data Sheet,
* Publication # 24674)
*/
- hpet_writel((unsigned long) delta, HPET_Tn_CMP(timer));
+ hpet_writel((unsigned int) delta, HPET_Tn_CMP(timer));
hpet_start_counter();
hpet_print_config();
break;
@@ -383,13 +384,24 @@ static int hpet_next_event(unsigned long delta,
hpet_writel(cnt, HPET_Tn_CMP(timer));
/*
- * We need to read back the CMP register to make sure that
- * what we wrote hit the chip before we compare it to the
- * counter.
+ * We need to read back the CMP register on certain HPET
+ * implementations (ATI chipsets) which seem to delay the
+ * transfer of the compare register into the internal compare
+ * logic. With small deltas this might actually be too late as
+ * the counter could already be higher than the compare value
+ * at that point and we would wait for the next hpet interrupt
+ * forever. We found out that reading the CMP register back
+ * forces the transfer so we can rely on the comparison with
+ * the counter register below. If the read back from the
+ * compare register does not match the value we programmed
+ * then we might have a real hardware problem. We can not do
+ * much about it here, but at least alert the user/admin with
+ * a prominent warning.
*/
- WARN_ON_ONCE((u32)hpet_readl(HPET_Tn_CMP(timer)) != cnt);
+ WARN_ONCE(hpet_readl(HPET_Tn_CMP(timer)) != cnt,
+ KERN_WARNING "hpet: compare register read back failed.\n");
- return (s32)((u32)hpet_readl(HPET_COUNTER) - cnt) >= 0 ? -ETIME : 0;
+ return (s32)(hpet_readl(HPET_COUNTER) - cnt) >= 0 ? -ETIME : 0;
}
static void hpet_legacy_set_mode(enum clock_event_mode mode,
@@ -415,7 +427,7 @@ static struct hpet_dev *hpet_devs;
void hpet_msi_unmask(unsigned int irq)
{
struct hpet_dev *hdev = get_irq_data(irq);
- unsigned long cfg;
+ unsigned int cfg;
/* unmask it */
cfg = hpet_readl(HPET_Tn_CFG(hdev->num));
@@ -425,7 +437,7 @@ void hpet_msi_unmask(unsigned int irq)
void hpet_msi_mask(unsigned int irq)
{
- unsigned long cfg;
+ unsigned int cfg;
struct hpet_dev *hdev = get_irq_data(irq);
/* mask it */
@@ -467,7 +479,7 @@ static int hpet_msi_next_event(unsigned long delta,
static int hpet_setup_msi_irq(unsigned int irq)
{
- if (arch_setup_hpet_msi(irq)) {
+ if (arch_setup_hpet_msi(irq, hpet_blockid)) {
destroy_irq(irq);
return -EINVAL;
}
@@ -584,6 +596,8 @@ static void hpet_msi_capability_lookup(unsigned int start_timer)
unsigned int num_timers_used = 0;
int i;
+ if (boot_cpu_has(X86_FEATURE_ARAT))
+ return;
id = hpet_readl(HPET_ID);
num_timers = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT);
@@ -598,7 +612,7 @@ static void hpet_msi_capability_lookup(unsigned int start_timer)
for (i = start_timer; i < num_timers - RESERVE_TIMERS; i++) {
struct hpet_dev *hdev = &hpet_devs[num_timers_used];
- unsigned long cfg = hpet_readl(HPET_Tn_CFG(i));
+ unsigned int cfg = hpet_readl(HPET_Tn_CFG(i));
/* Only consider HPET timer with MSI support */
if (!(cfg & HPET_TN_FSB_CAP))
@@ -813,7 +827,7 @@ static int hpet_clocksource_register(void)
*/
int __init hpet_enable(void)
{
- unsigned long id;
+ unsigned int id;
int i;
if (!is_hpet_capable())
@@ -872,10 +886,8 @@ int __init hpet_enable(void)
if (id & HPET_ID_LEGSUP) {
hpet_legacy_clockevent_register();
- hpet_msi_capability_lookup(2);
return 1;
}
- hpet_msi_capability_lookup(0);
return 0;
out_nohpet:
@@ -908,9 +920,17 @@ static __init int hpet_late_init(void)
if (!hpet_virt_address)
return -ENODEV;
+ if (hpet_readl(HPET_ID) & HPET_ID_LEGSUP)
+ hpet_msi_capability_lookup(2);
+ else
+ hpet_msi_capability_lookup(0);
+
hpet_reserve_platform_timers(hpet_readl(HPET_ID));
hpet_print_config();
+ if (boot_cpu_has(X86_FEATURE_ARAT))
+ return 0;
+
for_each_online_cpu(cpu) {
hpet_cpuhp_notify(NULL, CPU_ONLINE, (void *)(long)cpu);
}
@@ -925,7 +945,7 @@ fs_initcall(hpet_late_init);
void hpet_disable(void)
{
if (is_hpet_capable()) {
- unsigned long cfg = hpet_readl(HPET_CFG);
+ unsigned int cfg = hpet_readl(HPET_CFG);
if (hpet_legacy_int_enabled) {
cfg &= ~HPET_CFG_LEGACY;
@@ -965,8 +985,8 @@ static int hpet_prev_update_sec;
static struct rtc_time hpet_alarm_time;
static unsigned long hpet_pie_count;
static u32 hpet_t1_cmp;
-static unsigned long hpet_default_delta;
-static unsigned long hpet_pie_delta;
+static u32 hpet_default_delta;
+static u32 hpet_pie_delta;
static unsigned long hpet_pie_limit;
static rtc_irq_handler irq_handler;
@@ -1017,7 +1037,8 @@ EXPORT_SYMBOL_GPL(hpet_unregister_irq_handler);
*/
int hpet_rtc_timer_init(void)
{
- unsigned long cfg, cnt, delta, flags;
+ unsigned int cfg, cnt, delta;
+ unsigned long flags;
if (!is_hpet_enabled())
return 0;
@@ -1027,7 +1048,7 @@ int hpet_rtc_timer_init(void)
clc = (uint64_t) hpet_clockevent.mult * NSEC_PER_SEC;
clc >>= hpet_clockevent.shift + DEFAULT_RTC_SHIFT;
- hpet_default_delta = (unsigned long) clc;
+ hpet_default_delta = clc;
}
if (!(hpet_rtc_flags & RTC_PIE) || hpet_pie_limit)
@@ -1113,7 +1134,7 @@ int hpet_set_periodic_freq(unsigned long freq)
clc = (uint64_t) hpet_clockevent.mult * NSEC_PER_SEC;
do_div(clc, freq);
clc >>= hpet_clockevent.shift;
- hpet_pie_delta = (unsigned long) clc;
+ hpet_pie_delta = clc;
}
return 1;
}
@@ -1127,7 +1148,7 @@ EXPORT_SYMBOL_GPL(hpet_rtc_dropped_irq);
static void hpet_rtc_timer_reinit(void)
{
- unsigned long cfg, delta;
+ unsigned int cfg, delta;
int lost_ints = -1;
if (unlikely(!hpet_rtc_flags)) {
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c
index d42f65ac4927..05d5fec64a94 100644
--- a/arch/x86/kernel/hw_breakpoint.c
+++ b/arch/x86/kernel/hw_breakpoint.c
@@ -362,8 +362,7 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp,
return ret;
}
- if (bp->callback)
- ret = arch_store_info(bp);
+ ret = arch_store_info(bp);
if (ret < 0)
return ret;
@@ -519,7 +518,7 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args)
break;
}
- (bp->callback)(bp, args->regs);
+ perf_bp_event(bp, args->regs);
rcu_read_unlock();
}
diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c
index 99c4d308f16b..8eec0ec59af2 100644
--- a/arch/x86/kernel/ioport.c
+++ b/arch/x86/kernel/ioport.c
@@ -103,9 +103,10 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
* on system-call entry - see also fork() and the signal handling
* code.
*/
-static int do_iopl(unsigned int level, struct pt_regs *regs)
+long sys_iopl(unsigned int level, struct pt_regs *regs)
{
unsigned int old = (regs->flags >> 12) & 3;
+ struct thread_struct *t = &current->thread;
if (level > 3)
return -EINVAL;
@@ -115,29 +116,8 @@ static int do_iopl(unsigned int level, struct pt_regs *regs)
return -EPERM;
}
regs->flags = (regs->flags & ~X86_EFLAGS_IOPL) | (level << 12);
-
- return 0;
-}
-
-#ifdef CONFIG_X86_32
-long sys_iopl(struct pt_regs *regs)
-{
- unsigned int level = regs->bx;
- struct thread_struct *t = &current->thread;
- int rc;
-
- rc = do_iopl(level, regs);
- if (rc < 0)
- goto out;
-
t->iopl = level << 12;
set_iopl_mask(t->iopl);
-out:
- return rc;
-}
-#else
-asmlinkage long sys_iopl(unsigned int level, struct pt_regs *regs)
-{
- return do_iopl(level, regs);
+
+ return 0;
}
-#endif
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index fee6cc2b2079..91fd0c70a18a 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -18,7 +18,7 @@
atomic_t irq_err_count;
/* Function pointer for generic interrupt vector handling */
-void (*generic_interrupt_extension)(void) = NULL;
+void (*x86_platform_ipi_callback)(void) = NULL;
/*
* 'what should we do if we get a hw irq event on an illegal vector'.
@@ -72,10 +72,10 @@ static int show_other_interrupts(struct seq_file *p, int prec)
seq_printf(p, "%10u ", irq_stats(j)->apic_pending_irqs);
seq_printf(p, " Performance pending work\n");
#endif
- if (generic_interrupt_extension) {
+ if (x86_platform_ipi_callback) {
seq_printf(p, "%*s: ", prec, "PLT");
for_each_online_cpu(j)
- seq_printf(p, "%10u ", irq_stats(j)->generic_irqs);
+ seq_printf(p, "%10u ", irq_stats(j)->x86_platform_ipis);
seq_printf(p, " Platform interrupts\n");
}
#ifdef CONFIG_SMP
@@ -149,7 +149,7 @@ int show_interrupts(struct seq_file *p, void *v)
if (!desc)
return 0;
- spin_lock_irqsave(&desc->lock, flags);
+ raw_spin_lock_irqsave(&desc->lock, flags);
for_each_online_cpu(j)
any_count |= kstat_irqs_cpu(i, j);
action = desc->action;
@@ -170,7 +170,7 @@ int show_interrupts(struct seq_file *p, void *v)
seq_putc(p, '\n');
out:
- spin_unlock_irqrestore(&desc->lock, flags);
+ raw_spin_unlock_irqrestore(&desc->lock, flags);
return 0;
}
@@ -187,8 +187,8 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
sum += irq_stats(cpu)->apic_perf_irqs;
sum += irq_stats(cpu)->apic_pending_irqs;
#endif
- if (generic_interrupt_extension)
- sum += irq_stats(cpu)->generic_irqs;
+ if (x86_platform_ipi_callback)
+ sum += irq_stats(cpu)->x86_platform_ipis;
#ifdef CONFIG_SMP
sum += irq_stats(cpu)->irq_resched_count;
sum += irq_stats(cpu)->irq_call_count;
@@ -251,9 +251,9 @@ unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
}
/*
- * Handler for GENERIC_INTERRUPT_VECTOR.
+ * Handler for X86_PLATFORM_IPI_VECTOR.
*/
-void smp_generic_interrupt(struct pt_regs *regs)
+void smp_x86_platform_ipi(struct pt_regs *regs)
{
struct pt_regs *old_regs = set_irq_regs(regs);
@@ -263,10 +263,10 @@ void smp_generic_interrupt(struct pt_regs *regs)
irq_enter();
- inc_irq_stat(generic_irqs);
+ inc_irq_stat(x86_platform_ipis);
- if (generic_interrupt_extension)
- generic_interrupt_extension();
+ if (x86_platform_ipi_callback)
+ x86_platform_ipi_callback();
irq_exit();
@@ -294,12 +294,12 @@ void fixup_irqs(void)
continue;
/* interrupt's are disabled at this point */
- spin_lock(&desc->lock);
+ raw_spin_lock(&desc->lock);
affinity = desc->affinity;
if (!irq_has_action(irq) ||
cpumask_equal(affinity, cpu_online_mask)) {
- spin_unlock(&desc->lock);
+ raw_spin_unlock(&desc->lock);
continue;
}
@@ -326,7 +326,7 @@ void fixup_irqs(void)
if (!(desc->status & IRQ_MOVE_PCNTXT) && desc->chip->unmask)
desc->chip->unmask(irq);
- spin_unlock(&desc->lock);
+ raw_spin_unlock(&desc->lock);
if (break_affinity && set_affinity)
printk("Broke affinity for irq %i\n", irq);
@@ -356,10 +356,10 @@ void fixup_irqs(void)
irq = __get_cpu_var(vector_irq)[vector];
desc = irq_to_desc(irq);
- spin_lock(&desc->lock);
+ raw_spin_lock(&desc->lock);
if (desc->chip->retrigger)
desc->chip->retrigger(irq);
- spin_unlock(&desc->lock);
+ raw_spin_unlock(&desc->lock);
}
}
}
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index 40f30773fb29..d5932226614f 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -200,8 +200,8 @@ static void __init apic_intr_init(void)
/* self generated IPI for local APIC timer */
alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
- /* generic IPI for platform specific use */
- alloc_intr_gate(GENERIC_INTERRUPT_VECTOR, generic_interrupt);
+ /* IPI for X86 platform specific use */
+ alloc_intr_gate(X86_PLATFORM_IPI_VECTOR, x86_platform_ipi);
/* IPI vectors for APIC spurious and error interrupts */
alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 20a5b3689463..dd74fe7273b1 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -86,9 +86,15 @@ void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs)
gdb_regs[GDB_DS] = regs->ds;
gdb_regs[GDB_ES] = regs->es;
gdb_regs[GDB_CS] = regs->cs;
- gdb_regs[GDB_SS] = __KERNEL_DS;
gdb_regs[GDB_FS] = 0xFFFF;
gdb_regs[GDB_GS] = 0xFFFF;
+ if (user_mode_vm(regs)) {
+ gdb_regs[GDB_SS] = regs->ss;
+ gdb_regs[GDB_SP] = regs->sp;
+ } else {
+ gdb_regs[GDB_SS] = __KERNEL_DS;
+ gdb_regs[GDB_SP] = kernel_stack_pointer(regs);
+ }
#else
gdb_regs[GDB_R8] = regs->r8;
gdb_regs[GDB_R9] = regs->r9;
@@ -101,8 +107,8 @@ void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs)
gdb_regs32[GDB_PS] = regs->flags;
gdb_regs32[GDB_CS] = regs->cs;
gdb_regs32[GDB_SS] = regs->ss;
-#endif
gdb_regs[GDB_SP] = kernel_stack_pointer(regs);
+#endif
}
/**
@@ -220,8 +226,7 @@ static void kgdb_correct_hw_break(void)
dr7 |= ((breakinfo[breakno].len << 2) |
breakinfo[breakno].type) <<
((breakno << 2) + 16);
- if (breakno >= 0 && breakno <= 3)
- set_debugreg(breakinfo[breakno].addr, breakno);
+ set_debugreg(breakinfo[breakno].addr, breakno);
} else {
if ((dr7 & breakbit) && !breakinfo[breakno].enabled) {
@@ -395,7 +400,6 @@ int kgdb_arch_handle_exception(int e_vector, int signo, int err_code,
/* set the trace bit if we're stepping */
if (remcomInBuffer[0] == 's') {
linux_regs->flags |= X86_EFLAGS_TF;
- kgdb_single_step = 1;
atomic_set(&kgdb_cpu_doing_single_step,
raw_smp_processor_id());
}
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 1f3186ce213c..5b8c7505b3bc 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -481,7 +481,7 @@ static int __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs,
/*
* Interrupts are disabled on entry as trap3 is an interrupt gate and they
- * remain disabled thorough out this function.
+ * remain disabled throughout this function.
*/
static int __kprobes kprobe_handler(struct pt_regs *regs)
{
@@ -818,7 +818,7 @@ no_change:
/*
* Interrupts are disabled on entry as trap1 is an interrupt gate and they
- * remain disabled thoroughout this function.
+ * remain disabled throughout this function.
*/
static int __kprobes post_kprobe_handler(struct pt_regs *regs)
{
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index c843f8406da2..a3fa43ba5d3b 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -158,8 +158,7 @@ int machine_kexec_prepare(struct kimage *image)
{
int error;
- if (nx_enabled)
- set_pages_x(image->control_code_page, 1);
+ set_pages_x(image->control_code_page, 1);
error = machine_kexec_alloc_page_tables(image);
if (error)
return error;
@@ -173,8 +172,7 @@ int machine_kexec_prepare(struct kimage *image)
*/
void machine_kexec_cleanup(struct kimage *image)
{
- if (nx_enabled)
- set_pages_nx(image->control_code_page, 1);
+ set_pages_nx(image->control_code_page, 1);
machine_kexec_free_page_tables(image);
}
diff --git a/arch/x86/kernel/mfgpt_32.c b/arch/x86/kernel/mfgpt_32.c
deleted file mode 100644
index 2a62d843f015..000000000000
--- a/arch/x86/kernel/mfgpt_32.c
+++ /dev/null
@@ -1,410 +0,0 @@
-/*
- * Driver/API for AMD Geode Multi-Function General Purpose Timers (MFGPT)
- *
- * Copyright (C) 2006, Advanced Micro Devices, Inc.
- * Copyright (C) 2007, Andres Salomon <dilinger@debian.org>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public License
- * as published by the Free Software Foundation.
- *
- * The MFGPTs are documented in AMD Geode CS5536 Companion Device Data Book.
- */
-
-/*
- * We are using the 32.768kHz input clock - it's the only one that has the
- * ranges we find desirable. The following table lists the suitable
- * divisors and the associated Hz, minimum interval and the maximum interval:
- *
- * Divisor Hz Min Delta (s) Max Delta (s)
- * 1 32768 .00048828125 2.000
- * 2 16384 .0009765625 4.000
- * 4 8192 .001953125 8.000
- * 8 4096 .00390625 16.000
- * 16 2048 .0078125 32.000
- * 32 1024 .015625 64.000
- * 64 512 .03125 128.000
- * 128 256 .0625 256.000
- * 256 128 .125 512.000
- */
-
-#include <linux/kernel.h>
-#include <linux/interrupt.h>
-#include <linux/module.h>
-#include <asm/geode.h>
-
-#define MFGPT_DEFAULT_IRQ 7
-
-static struct mfgpt_timer_t {
- unsigned int avail:1;
-} mfgpt_timers[MFGPT_MAX_TIMERS];
-
-/* Selected from the table above */
-
-#define MFGPT_DIVISOR 16
-#define MFGPT_SCALE 4 /* divisor = 2^(scale) */
-#define MFGPT_HZ (32768 / MFGPT_DIVISOR)
-#define MFGPT_PERIODIC (MFGPT_HZ / HZ)
-
-/* Allow for disabling of MFGPTs */
-static int disable;
-static int __init mfgpt_disable(char *s)
-{
- disable = 1;
- return 1;
-}
-__setup("nomfgpt", mfgpt_disable);
-
-/* Reset the MFGPT timers. This is required by some broken BIOSes which already
- * do the same and leave the system in an unstable state. TinyBIOS 0.98 is
- * affected at least (0.99 is OK with MFGPT workaround left to off).
- */
-static int __init mfgpt_fix(char *s)
-{
- u32 val, dummy;
-
- /* The following udocumented bit resets the MFGPT timers */
- val = 0xFF; dummy = 0;
- wrmsr(MSR_MFGPT_SETUP, val, dummy);
- return 1;
-}
-__setup("mfgptfix", mfgpt_fix);
-
-/*
- * Check whether any MFGPTs are available for the kernel to use. In most
- * cases, firmware that uses AMD's VSA code will claim all timers during
- * bootup; we certainly don't want to take them if they're already in use.
- * In other cases (such as with VSAless OpenFirmware), the system firmware
- * leaves timers available for us to use.
- */
-
-
-static int timers = -1;
-
-static void geode_mfgpt_detect(void)
-{
- int i;
- u16 val;
-
- timers = 0;
-
- if (disable) {
- printk(KERN_INFO "geode-mfgpt: MFGPT support is disabled\n");
- goto done;
- }
-
- if (!geode_get_dev_base(GEODE_DEV_MFGPT)) {
- printk(KERN_INFO "geode-mfgpt: MFGPT LBAR is not set up\n");
- goto done;
- }
-
- for (i = 0; i < MFGPT_MAX_TIMERS; i++) {
- val = geode_mfgpt_read(i, MFGPT_REG_SETUP);
- if (!(val & MFGPT_SETUP_SETUP)) {
- mfgpt_timers[i].avail = 1;
- timers++;
- }
- }
-
-done:
- printk(KERN_INFO "geode-mfgpt: %d MFGPT timers available.\n", timers);
-}
-
-int geode_mfgpt_toggle_event(int timer, int cmp, int event, int enable)
-{
- u32 msr, mask, value, dummy;
- int shift = (cmp == MFGPT_CMP1) ? 0 : 8;
-
- if (timer < 0 || timer >= MFGPT_MAX_TIMERS)
- return -EIO;
-
- /*
- * The register maps for these are described in sections 6.17.1.x of
- * the AMD Geode CS5536 Companion Device Data Book.
- */
- switch (event) {
- case MFGPT_EVENT_RESET:
- /*
- * XXX: According to the docs, we cannot reset timers above
- * 6; that is, resets for 7 and 8 will be ignored. Is this
- * a problem? -dilinger
- */
- msr = MSR_MFGPT_NR;
- mask = 1 << (timer + 24);
- break;
-
- case MFGPT_EVENT_NMI:
- msr = MSR_MFGPT_NR;
- mask = 1 << (timer + shift);
- break;
-
- case MFGPT_EVENT_IRQ:
- msr = MSR_MFGPT_IRQ;
- mask = 1 << (timer + shift);
- break;
-
- default:
- return -EIO;
- }
-
- rdmsr(msr, value, dummy);
-
- if (enable)
- value |= mask;
- else
- value &= ~mask;
-
- wrmsr(msr, value, dummy);
- return 0;
-}
-EXPORT_SYMBOL_GPL(geode_mfgpt_toggle_event);
-
-int geode_mfgpt_set_irq(int timer, int cmp, int *irq, int enable)
-{
- u32 zsel, lpc, dummy;
- int shift;
-
- if (timer < 0 || timer >= MFGPT_MAX_TIMERS)
- return -EIO;
-
- /*
- * Unfortunately, MFGPTs come in pairs sharing their IRQ lines. If VSA
- * is using the same CMP of the timer's Siamese twin, the IRQ is set to
- * 2, and we mustn't use nor change it.
- * XXX: Likewise, 2 Linux drivers might clash if the 2nd overwrites the
- * IRQ of the 1st. This can only happen if forcing an IRQ, calling this
- * with *irq==0 is safe. Currently there _are_ no 2 drivers.
- */
- rdmsr(MSR_PIC_ZSEL_LOW, zsel, dummy);
- shift = ((cmp == MFGPT_CMP1 ? 0 : 4) + timer % 4) * 4;
- if (((zsel >> shift) & 0xF) == 2)
- return -EIO;
-
- /* Choose IRQ: if none supplied, keep IRQ already set or use default */
- if (!*irq)
- *irq = (zsel >> shift) & 0xF;
- if (!*irq)
- *irq = MFGPT_DEFAULT_IRQ;
-
- /* Can't use IRQ if it's 0 (=disabled), 2, or routed to LPC */
- if (*irq < 1 || *irq == 2 || *irq > 15)
- return -EIO;
- rdmsr(MSR_PIC_IRQM_LPC, lpc, dummy);
- if (lpc & (1 << *irq))
- return -EIO;
-
- /* All chosen and checked - go for it */
- if (geode_mfgpt_toggle_event(timer, cmp, MFGPT_EVENT_IRQ, enable))
- return -EIO;
- if (enable) {
- zsel = (zsel & ~(0xF << shift)) | (*irq << shift);
- wrmsr(MSR_PIC_ZSEL_LOW, zsel, dummy);
- }
-
- return 0;
-}
-
-static int mfgpt_get(int timer)
-{
- mfgpt_timers[timer].avail = 0;
- printk(KERN_INFO "geode-mfgpt: Registered timer %d\n", timer);
- return timer;
-}
-
-int geode_mfgpt_alloc_timer(int timer, int domain)
-{
- int i;
-
- if (timers == -1) {
- /* timers haven't been detected yet */
- geode_mfgpt_detect();
- }
-
- if (!timers)
- return -1;
-
- if (timer >= MFGPT_MAX_TIMERS)
- return -1;
-
- if (timer < 0) {
- /* Try to find an available timer */
- for (i = 0; i < MFGPT_MAX_TIMERS; i++) {
- if (mfgpt_timers[i].avail)
- return mfgpt_get(i);
-
- if (i == 5 && domain == MFGPT_DOMAIN_WORKING)
- break;
- }
- } else {
- /* If they requested a specific timer, try to honor that */
- if (mfgpt_timers[timer].avail)
- return mfgpt_get(timer);
- }
-
- /* No timers available - too bad */
- return -1;
-}
-EXPORT_SYMBOL_GPL(geode_mfgpt_alloc_timer);
-
-
-#ifdef CONFIG_GEODE_MFGPT_TIMER
-
-/*
- * The MFPGT timers on the CS5536 provide us with suitable timers to use
- * as clock event sources - not as good as a HPET or APIC, but certainly
- * better than the PIT. This isn't a general purpose MFGPT driver, but
- * a simplified one designed specifically to act as a clock event source.
- * For full details about the MFGPT, please consult the CS5536 data sheet.
- */
-
-#include <linux/clocksource.h>
-#include <linux/clockchips.h>
-
-static unsigned int mfgpt_tick_mode = CLOCK_EVT_MODE_SHUTDOWN;
-static u16 mfgpt_event_clock;
-
-static int irq;
-static int __init mfgpt_setup(char *str)
-{
- get_option(&str, &irq);
- return 1;
-}
-__setup("mfgpt_irq=", mfgpt_setup);
-
-static void mfgpt_disable_timer(u16 clock)
-{
- /* avoid races by clearing CMP1 and CMP2 unconditionally */
- geode_mfgpt_write(clock, MFGPT_REG_SETUP, (u16) ~MFGPT_SETUP_CNTEN |
- MFGPT_SETUP_CMP1 | MFGPT_SETUP_CMP2);
-}
-
-static int mfgpt_next_event(unsigned long, struct clock_event_device *);
-static void mfgpt_set_mode(enum clock_event_mode, struct clock_event_device *);
-
-static struct clock_event_device mfgpt_clockevent = {
- .name = "mfgpt-timer",
- .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
- .set_mode = mfgpt_set_mode,
- .set_next_event = mfgpt_next_event,
- .rating = 250,
- .cpumask = cpu_all_mask,
- .shift = 32
-};
-
-static void mfgpt_start_timer(u16 delta)
-{
- geode_mfgpt_write(mfgpt_event_clock, MFGPT_REG_CMP2, (u16) delta);
- geode_mfgpt_write(mfgpt_event_clock, MFGPT_REG_COUNTER, 0);
-
- geode_mfgpt_write(mfgpt_event_clock, MFGPT_REG_SETUP,
- MFGPT_SETUP_CNTEN | MFGPT_SETUP_CMP2);
-}
-
-static void mfgpt_set_mode(enum clock_event_mode mode,
- struct clock_event_device *evt)
-{
- mfgpt_disable_timer(mfgpt_event_clock);
-
- if (mode == CLOCK_EVT_MODE_PERIODIC)
- mfgpt_start_timer(MFGPT_PERIODIC);
-
- mfgpt_tick_mode = mode;
-}
-
-static int mfgpt_next_event(unsigned long delta, struct clock_event_device *evt)
-{
- mfgpt_start_timer(delta);
- return 0;
-}
-
-static irqreturn_t mfgpt_tick(int irq, void *dev_id)
-{
- u16 val = geode_mfgpt_read(mfgpt_event_clock, MFGPT_REG_SETUP);
-
- /* See if the interrupt was for us */
- if (!(val & (MFGPT_SETUP_SETUP | MFGPT_SETUP_CMP2 | MFGPT_SETUP_CMP1)))
- return IRQ_NONE;
-
- /* Turn off the clock (and clear the event) */
- mfgpt_disable_timer(mfgpt_event_clock);
-
- if (mfgpt_tick_mode == CLOCK_EVT_MODE_SHUTDOWN)
- return IRQ_HANDLED;
-
- /* Clear the counter */
- geode_mfgpt_write(mfgpt_event_clock, MFGPT_REG_COUNTER, 0);
-
- /* Restart the clock in periodic mode */
-
- if (mfgpt_tick_mode == CLOCK_EVT_MODE_PERIODIC) {
- geode_mfgpt_write(mfgpt_event_clock, MFGPT_REG_SETUP,
- MFGPT_SETUP_CNTEN | MFGPT_SETUP_CMP2);
- }
-
- mfgpt_clockevent.event_handler(&mfgpt_clockevent);
- return IRQ_HANDLED;
-}
-
-static struct irqaction mfgptirq = {
- .handler = mfgpt_tick,
- .flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_TIMER,
- .name = "mfgpt-timer"
-};
-
-int __init mfgpt_timer_setup(void)
-{
- int timer, ret;
- u16 val;
-
- timer = geode_mfgpt_alloc_timer(MFGPT_TIMER_ANY, MFGPT_DOMAIN_WORKING);
- if (timer < 0) {
- printk(KERN_ERR
- "mfgpt-timer: Could not allocate a MFPGT timer\n");
- return -ENODEV;
- }
-
- mfgpt_event_clock = timer;
-
- /* Set up the IRQ on the MFGPT side */
- if (geode_mfgpt_setup_irq(mfgpt_event_clock, MFGPT_CMP2, &irq)) {
- printk(KERN_ERR "mfgpt-timer: Could not set up IRQ %d\n", irq);
- return -EIO;
- }
-
- /* And register it with the kernel */
- ret = setup_irq(irq, &mfgptirq);
-
- if (ret) {
- printk(KERN_ERR
- "mfgpt-timer: Unable to set up the interrupt.\n");
- goto err;
- }
-
- /* Set the clock scale and enable the event mode for CMP2 */
- val = MFGPT_SCALE | (3 << 8);
-
- geode_mfgpt_write(mfgpt_event_clock, MFGPT_REG_SETUP, val);
-
- /* Set up the clock event */
- mfgpt_clockevent.mult = div_sc(MFGPT_HZ, NSEC_PER_SEC,
- mfgpt_clockevent.shift);
- mfgpt_clockevent.min_delta_ns = clockevent_delta2ns(0xF,
- &mfgpt_clockevent);
- mfgpt_clockevent.max_delta_ns = clockevent_delta2ns(0xFFFE,
- &mfgpt_clockevent);
-
- printk(KERN_INFO
- "mfgpt-timer: Registering MFGPT timer %d as a clock event, using IRQ %d\n",
- timer, irq);
- clockevents_register_device(&mfgpt_clockevent);
-
- return 0;
-
-err:
- geode_mfgpt_release_irq(mfgpt_event_clock, MFGPT_CMP2, &irq);
- printk(KERN_ERR
- "mfgpt-timer: Unable to set up the MFGPT clock source\n");
- return -EIO;
-}
-
-#endif
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index f4c538b681ca..37542b67c57e 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -13,6 +13,9 @@
* Licensed under the terms of the GNU General Public
* License version 2. See file COPYING for details.
*/
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/firmware.h>
#include <linux/pci_ids.h>
#include <linux/uaccess.h>
@@ -33,6 +36,9 @@ MODULE_LICENSE("GPL v2");
#define UCODE_EQUIV_CPU_TABLE_TYPE 0x00000000
#define UCODE_UCODE_TYPE 0x00000001
+const struct firmware *firmware;
+static int supported_cpu;
+
struct equiv_cpu_entry {
u32 installed_cpu;
u32 fixed_errata_mask;
@@ -71,17 +77,14 @@ static struct equiv_cpu_entry *equiv_cpu_table;
static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig)
{
- struct cpuinfo_x86 *c = &cpu_data(cpu);
u32 dummy;
- memset(csig, 0, sizeof(*csig));
- if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) {
- printk(KERN_WARNING "microcode: CPU%d: AMD CPU family 0x%x not "
- "supported\n", cpu, c->x86);
+ if (!supported_cpu)
return -1;
- }
+
+ memset(csig, 0, sizeof(*csig));
rdmsr(MSR_AMD64_PATCH_LEVEL, csig->rev, dummy);
- printk(KERN_INFO "microcode: CPU%d: patch_level=0x%x\n", cpu, csig->rev);
+ pr_info("CPU%d: patch_level=0x%x\n", cpu, csig->rev);
return 0;
}
@@ -103,23 +106,16 @@ static int get_matching_microcode(int cpu, void *mc, int rev)
i++;
}
- if (!equiv_cpu_id) {
- printk(KERN_WARNING "microcode: CPU%d: cpu revision "
- "not listed in equivalent cpu table\n", cpu);
+ if (!equiv_cpu_id)
return 0;
- }
- if (mc_header->processor_rev_id != equiv_cpu_id) {
- printk(KERN_ERR "microcode: CPU%d: patch mismatch "
- "(processor_rev_id: %x, equiv_cpu_id: %x)\n",
- cpu, mc_header->processor_rev_id, equiv_cpu_id);
+ if (mc_header->processor_rev_id != equiv_cpu_id)
return 0;
- }
/* ucode might be chipset specific -- currently we don't support this */
if (mc_header->nb_dev_id || mc_header->sb_dev_id) {
- printk(KERN_ERR "microcode: CPU%d: loading of chipset "
- "specific code not yet supported\n", cpu);
+ pr_err("CPU%d: loading of chipset specific code not yet supported\n",
+ cpu);
return 0;
}
@@ -148,14 +144,12 @@ static int apply_microcode_amd(int cpu)
/* check current patch id and patch's id for match */
if (rev != mc_amd->hdr.patch_id) {
- printk(KERN_ERR "microcode: CPU%d: update failed "
- "(for patch_level=0x%x)\n", cpu, mc_amd->hdr.patch_id);
+ pr_err("CPU%d: update failed (for patch_level=0x%x)\n",
+ cpu, mc_amd->hdr.patch_id);
return -1;
}
- printk(KERN_INFO "microcode: CPU%d: updated (new patch_level=0x%x)\n",
- cpu, rev);
-
+ pr_info("CPU%d: updated (new patch_level=0x%x)\n", cpu, rev);
uci->cpu_sig.rev = rev;
return 0;
@@ -178,18 +172,14 @@ get_next_ucode(const u8 *buf, unsigned int size, unsigned int *mc_size)
return NULL;
if (section_hdr[0] != UCODE_UCODE_TYPE) {
- printk(KERN_ERR "microcode: error: invalid type field in "
- "container file section header\n");
+ pr_err("error: invalid type field in container file section header\n");
return NULL;
}
total_size = (unsigned long) (section_hdr[4] + (section_hdr[5] << 8));
- printk(KERN_DEBUG "microcode: size %u, total_size %u\n",
- size, total_size);
-
if (total_size > size || total_size > UCODE_MAX_SIZE) {
- printk(KERN_ERR "microcode: error: size mismatch\n");
+ pr_err("error: size mismatch\n");
return NULL;
}
@@ -218,15 +208,13 @@ static int install_equiv_cpu_table(const u8 *buf)
size = buf_pos[2];
if (buf_pos[1] != UCODE_EQUIV_CPU_TABLE_TYPE || !size) {
- printk(KERN_ERR "microcode: error: invalid type field in "
- "container file section header\n");
+ pr_err("error: invalid type field in container file section header\n");
return 0;
}
equiv_cpu_table = (struct equiv_cpu_entry *) vmalloc(size);
if (!equiv_cpu_table) {
- printk(KERN_ERR "microcode: failed to allocate "
- "equivalent CPU table\n");
+ pr_err("failed to allocate equivalent CPU table\n");
return 0;
}
@@ -259,8 +247,7 @@ generic_load_microcode(int cpu, const u8 *data, size_t size)
offset = install_equiv_cpu_table(ucode_ptr);
if (!offset) {
- printk(KERN_ERR "microcode: failed to create "
- "equivalent cpu table\n");
+ pr_err("failed to create equivalent cpu table\n");
return UCODE_ERROR;
}
@@ -291,8 +278,7 @@ generic_load_microcode(int cpu, const u8 *data, size_t size)
if (!leftover) {
vfree(uci->mc);
uci->mc = new_mc;
- pr_debug("microcode: CPU%d found a matching microcode "
- "update with version 0x%x (current=0x%x)\n",
+ pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n",
cpu, new_rev, uci->cpu_sig.rev);
} else {
vfree(new_mc);
@@ -308,33 +294,26 @@ generic_load_microcode(int cpu, const u8 *data, size_t size)
static enum ucode_state request_microcode_fw(int cpu, struct device *device)
{
- const char *fw_name = "amd-ucode/microcode_amd.bin";
- const struct firmware *firmware;
enum ucode_state ret;
- if (request_firmware(&firmware, fw_name, device)) {
- printk(KERN_ERR "microcode: failed to load file %s\n", fw_name);
+ if (firmware == NULL)
return UCODE_NFOUND;
- }
if (*(u32 *)firmware->data != UCODE_MAGIC) {
- printk(KERN_ERR "microcode: invalid UCODE_MAGIC (0x%08x)\n",
+ pr_err("invalid UCODE_MAGIC (0x%08x)\n",
*(u32 *)firmware->data);
return UCODE_ERROR;
}
ret = generic_load_microcode(cpu, firmware->data, firmware->size);
- release_firmware(firmware);
-
return ret;
}
static enum ucode_state
request_microcode_user(int cpu, const void __user *buf, size_t size)
{
- printk(KERN_INFO "microcode: AMD microcode update via "
- "/dev/cpu/microcode not supported\n");
+ pr_info("AMD microcode update via /dev/cpu/microcode not supported\n");
return UCODE_ERROR;
}
@@ -346,7 +325,31 @@ static void microcode_fini_cpu_amd(int cpu)
uci->mc = NULL;
}
+void init_microcode_amd(struct device *device)
+{
+ const char *fw_name = "amd-ucode/microcode_amd.bin";
+ struct cpuinfo_x86 *c = &boot_cpu_data;
+
+ WARN_ON(c->x86_vendor != X86_VENDOR_AMD);
+
+ if (c->x86 < 0x10) {
+ pr_warning("AMD CPU family 0x%x not supported\n", c->x86);
+ return;
+ }
+ supported_cpu = 1;
+
+ if (request_firmware(&firmware, fw_name, device))
+ pr_err("failed to load file %s\n", fw_name);
+}
+
+void fini_microcode_amd(void)
+{
+ release_firmware(firmware);
+}
+
static struct microcode_ops microcode_amd_ops = {
+ .init = init_microcode_amd,
+ .fini = fini_microcode_amd,
.request_microcode_user = request_microcode_user,
.request_microcode_fw = request_microcode_fw,
.collect_cpu_info = collect_cpu_info_amd,
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index 2bcad3926edb..844c02c65fcb 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -70,6 +70,9 @@
* Fix sigmatch() macro to handle old CPUs with pf == 0.
* Thanks to Stuart Swales for pointing out this bug.
*/
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/platform_device.h>
#include <linux/miscdevice.h>
#include <linux/capability.h>
@@ -209,7 +212,7 @@ static ssize_t microcode_write(struct file *file, const char __user *buf,
ssize_t ret = -EINVAL;
if ((len >> PAGE_SHIFT) > totalram_pages) {
- pr_err("microcode: too much data (max %ld pages)\n", totalram_pages);
+ pr_err("too much data (max %ld pages)\n", totalram_pages);
return ret;
}
@@ -244,7 +247,7 @@ static int __init microcode_dev_init(void)
error = misc_register(&microcode_dev);
if (error) {
- pr_err("microcode: can't misc_register on minor=%d\n", MICROCODE_MINOR);
+ pr_err("can't misc_register on minor=%d\n", MICROCODE_MINOR);
return error;
}
@@ -359,7 +362,7 @@ static enum ucode_state microcode_resume_cpu(int cpu)
if (!uci->mc)
return UCODE_NFOUND;
- pr_debug("microcode: CPU%d updated upon resume\n", cpu);
+ pr_debug("CPU%d updated upon resume\n", cpu);
apply_microcode_on_target(cpu);
return UCODE_OK;
@@ -379,7 +382,7 @@ static enum ucode_state microcode_init_cpu(int cpu)
ustate = microcode_ops->request_microcode_fw(cpu, &microcode_pdev->dev);
if (ustate == UCODE_OK) {
- pr_debug("microcode: CPU%d updated upon init\n", cpu);
+ pr_debug("CPU%d updated upon init\n", cpu);
apply_microcode_on_target(cpu);
}
@@ -391,7 +394,7 @@ static enum ucode_state microcode_update_cpu(int cpu)
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
enum ucode_state ustate;
- if (uci->valid)
+ if (uci->valid && uci->mc)
ustate = microcode_resume_cpu(cpu);
else
ustate = microcode_init_cpu(cpu);
@@ -406,7 +409,7 @@ static int mc_sysdev_add(struct sys_device *sys_dev)
if (!cpu_online(cpu))
return 0;
- pr_debug("microcode: CPU%d added\n", cpu);
+ pr_debug("CPU%d added\n", cpu);
err = sysfs_create_group(&sys_dev->kobj, &mc_attr_group);
if (err)
@@ -425,7 +428,7 @@ static int mc_sysdev_remove(struct sys_device *sys_dev)
if (!cpu_online(cpu))
return 0;
- pr_debug("microcode: CPU%d removed\n", cpu);
+ pr_debug("CPU%d removed\n", cpu);
microcode_fini_cpu(cpu);
sysfs_remove_group(&sys_dev->kobj, &mc_attr_group);
return 0;
@@ -473,15 +476,15 @@ mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu)
microcode_update_cpu(cpu);
case CPU_DOWN_FAILED:
case CPU_DOWN_FAILED_FROZEN:
- pr_debug("microcode: CPU%d added\n", cpu);
+ pr_debug("CPU%d added\n", cpu);
if (sysfs_create_group(&sys_dev->kobj, &mc_attr_group))
- pr_err("microcode: Failed to create group for CPU%d\n", cpu);
+ pr_err("Failed to create group for CPU%d\n", cpu);
break;
case CPU_DOWN_PREPARE:
case CPU_DOWN_PREPARE_FROZEN:
/* Suspend is in progress, only remove the interface */
sysfs_remove_group(&sys_dev->kobj, &mc_attr_group);
- pr_debug("microcode: CPU%d removed\n", cpu);
+ pr_debug("CPU%d removed\n", cpu);
break;
case CPU_DEAD:
case CPU_UP_CANCELED_FROZEN:
@@ -507,7 +510,7 @@ static int __init microcode_init(void)
microcode_ops = init_amd_microcode();
if (!microcode_ops) {
- pr_err("microcode: no support for this CPU vendor\n");
+ pr_err("no support for this CPU vendor\n");
return -ENODEV;
}
@@ -518,6 +521,9 @@ static int __init microcode_init(void)
return PTR_ERR(microcode_pdev);
}
+ if (microcode_ops->init)
+ microcode_ops->init(&microcode_pdev->dev);
+
get_online_cpus();
mutex_lock(&microcode_mutex);
@@ -538,8 +544,7 @@ static int __init microcode_init(void)
register_hotcpu_notifier(&mc_cpu_notifier);
pr_info("Microcode Update Driver: v" MICROCODE_VERSION
- " <tigran@aivazian.fsnet.co.uk>,"
- " Peter Oruba\n");
+ " <tigran@aivazian.fsnet.co.uk>, Peter Oruba\n");
return 0;
}
@@ -561,6 +566,9 @@ static void __exit microcode_exit(void)
platform_device_unregister(microcode_pdev);
+ if (microcode_ops->fini)
+ microcode_ops->fini();
+
microcode_ops = NULL;
pr_info("Microcode Update Driver: v" MICROCODE_VERSION " removed.\n");
diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c
index 0d334ddd0a96..ebd193e476ca 100644
--- a/arch/x86/kernel/microcode_intel.c
+++ b/arch/x86/kernel/microcode_intel.c
@@ -70,6 +70,9 @@
* Fix sigmatch() macro to handle old CPUs with pf == 0.
* Thanks to Stuart Swales for pointing out this bug.
*/
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/firmware.h>
#include <linux/uaccess.h>
#include <linux/kernel.h>
@@ -146,8 +149,7 @@ static int collect_cpu_info(int cpu_num, struct cpu_signature *csig)
if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 ||
cpu_has(c, X86_FEATURE_IA64)) {
- printk(KERN_ERR "microcode: CPU%d not a capable Intel "
- "processor\n", cpu_num);
+ pr_err("CPU%d not a capable Intel processor\n", cpu_num);
return -1;
}
@@ -165,8 +167,8 @@ static int collect_cpu_info(int cpu_num, struct cpu_signature *csig)
/* get the current revision from MSR 0x8B */
rdmsr(MSR_IA32_UCODE_REV, val[0], csig->rev);
- printk(KERN_INFO "microcode: CPU%d sig=0x%x, pf=0x%x, revision=0x%x\n",
- cpu_num, csig->sig, csig->pf, csig->rev);
+ pr_info("CPU%d sig=0x%x, pf=0x%x, revision=0x%x\n",
+ cpu_num, csig->sig, csig->pf, csig->rev);
return 0;
}
@@ -194,28 +196,24 @@ static int microcode_sanity_check(void *mc)
data_size = get_datasize(mc_header);
if (data_size + MC_HEADER_SIZE > total_size) {
- printk(KERN_ERR "microcode: error! "
- "Bad data size in microcode data file\n");
+ pr_err("error! Bad data size in microcode data file\n");
return -EINVAL;
}
if (mc_header->ldrver != 1 || mc_header->hdrver != 1) {
- printk(KERN_ERR "microcode: error! "
- "Unknown microcode update format\n");
+ pr_err("error! Unknown microcode update format\n");
return -EINVAL;
}
ext_table_size = total_size - (MC_HEADER_SIZE + data_size);
if (ext_table_size) {
if ((ext_table_size < EXT_HEADER_SIZE)
|| ((ext_table_size - EXT_HEADER_SIZE) % EXT_SIGNATURE_SIZE)) {
- printk(KERN_ERR "microcode: error! "
- "Small exttable size in microcode data file\n");
+ pr_err("error! Small exttable size in microcode data file\n");
return -EINVAL;
}
ext_header = mc + MC_HEADER_SIZE + data_size;
if (ext_table_size != exttable_size(ext_header)) {
- printk(KERN_ERR "microcode: error! "
- "Bad exttable size in microcode data file\n");
+ pr_err("error! Bad exttable size in microcode data file\n");
return -EFAULT;
}
ext_sigcount = ext_header->count;
@@ -230,8 +228,7 @@ static int microcode_sanity_check(void *mc)
while (i--)
ext_table_sum += ext_tablep[i];
if (ext_table_sum) {
- printk(KERN_WARNING "microcode: aborting, "
- "bad extended signature table checksum\n");
+ pr_warning("aborting, bad extended signature table checksum\n");
return -EINVAL;
}
}
@@ -242,7 +239,7 @@ static int microcode_sanity_check(void *mc)
while (i--)
orig_sum += ((int *)mc)[i];
if (orig_sum) {
- printk(KERN_ERR "microcode: aborting, bad checksum\n");
+ pr_err("aborting, bad checksum\n");
return -EINVAL;
}
if (!ext_table_size)
@@ -255,7 +252,7 @@ static int microcode_sanity_check(void *mc)
- (mc_header->sig + mc_header->pf + mc_header->cksum)
+ (ext_sig->sig + ext_sig->pf + ext_sig->cksum);
if (sum) {
- printk(KERN_ERR "microcode: aborting, bad checksum\n");
+ pr_err("aborting, bad checksum\n");
return -EINVAL;
}
}
@@ -327,13 +324,11 @@ static int apply_microcode(int cpu)
rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
if (val[1] != mc_intel->hdr.rev) {
- printk(KERN_ERR "microcode: CPU%d update "
- "to revision 0x%x failed\n",
- cpu_num, mc_intel->hdr.rev);
+ pr_err("CPU%d update to revision 0x%x failed\n",
+ cpu_num, mc_intel->hdr.rev);
return -1;
}
- printk(KERN_INFO "microcode: CPU%d updated to revision "
- "0x%x, date = %04x-%02x-%02x \n",
+ pr_info("CPU%d updated to revision 0x%x, date = %04x-%02x-%02x \n",
cpu_num, val[1],
mc_intel->hdr.date & 0xffff,
mc_intel->hdr.date >> 24,
@@ -362,8 +357,7 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
mc_size = get_totalsize(&mc_header);
if (!mc_size || mc_size > leftover) {
- printk(KERN_ERR "microcode: error!"
- "Bad data in microcode data file\n");
+ pr_err("error! Bad data in microcode data file\n");
break;
}
@@ -405,9 +399,8 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
vfree(uci->mc);
uci->mc = (struct microcode_intel *)new_mc;
- pr_debug("microcode: CPU%d found a matching microcode update with"
- " version 0x%x (current=0x%x)\n",
- cpu, new_rev, uci->cpu_sig.rev);
+ pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n",
+ cpu, new_rev, uci->cpu_sig.rev);
out:
return state;
}
@@ -429,7 +422,7 @@ static enum ucode_state request_microcode_fw(int cpu, struct device *device)
c->x86, c->x86_model, c->x86_mask);
if (request_firmware(&firmware, name, device)) {
- pr_debug("microcode: data file %s load failed\n", name);
+ pr_debug("data file %s load failed\n", name);
return UCODE_NFOUND;
}
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 5be95ef4ffec..40b54ceb68b5 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -667,36 +667,18 @@ void __init default_get_smp_config(unsigned int early)
*/
}
-static void __init smp_reserve_bootmem(struct mpf_intel *mpf)
+static void __init smp_reserve_memory(struct mpf_intel *mpf)
{
unsigned long size = get_mpc_size(mpf->physptr);
-#ifdef CONFIG_X86_32
- /*
- * We cannot access to MPC table to compute table size yet,
- * as only few megabytes from the bottom is mapped now.
- * PC-9800's MPC table places on the very last of physical
- * memory; so that simply reserving PAGE_SIZE from mpf->physptr
- * yields BUG() in reserve_bootmem.
- * also need to make sure physptr is below than max_low_pfn
- * we don't need reserve the area above max_low_pfn
- */
- unsigned long end = max_low_pfn * PAGE_SIZE;
- if (mpf->physptr < end) {
- if (mpf->physptr + size > end)
- size = end - mpf->physptr;
- reserve_bootmem_generic(mpf->physptr, size, BOOTMEM_DEFAULT);
- }
-#else
- reserve_bootmem_generic(mpf->physptr, size, BOOTMEM_DEFAULT);
-#endif
+ reserve_early(mpf->physptr, mpf->physptr+size, "MP-table mpc");
}
-static int __init smp_scan_config(unsigned long base, unsigned long length,
- unsigned reserve)
+static int __init smp_scan_config(unsigned long base, unsigned long length)
{
unsigned int *bp = phys_to_virt(base);
struct mpf_intel *mpf;
+ unsigned long mem;
apic_printk(APIC_VERBOSE, "Scan SMP from %p for %ld bytes.\n",
bp, length);
@@ -717,12 +699,10 @@ static int __init smp_scan_config(unsigned long base, unsigned long length,
printk(KERN_INFO "found SMP MP-table at [%p] %llx\n",
mpf, (u64)virt_to_phys(mpf));
- if (!reserve)
- return 1;
- reserve_bootmem_generic(virt_to_phys(mpf), sizeof(*mpf),
- BOOTMEM_DEFAULT);
+ mem = virt_to_phys(mpf);
+ reserve_early(mem, mem + sizeof(*mpf), "MP-table mpf");
if (mpf->physptr)
- smp_reserve_bootmem(mpf);
+ smp_reserve_memory(mpf);
return 1;
}
@@ -732,7 +712,7 @@ static int __init smp_scan_config(unsigned long base, unsigned long length,
return 0;
}
-void __init default_find_smp_config(unsigned int reserve)
+void __init default_find_smp_config(void)
{
unsigned int address;
@@ -744,9 +724,9 @@ void __init default_find_smp_config(unsigned int reserve)
* 2) Scan the top 1K of base RAM
* 3) Scan the 64K of bios
*/
- if (smp_scan_config(0x0, 0x400, reserve) ||
- smp_scan_config(639 * 0x400, 0x400, reserve) ||
- smp_scan_config(0xF0000, 0x10000, reserve))
+ if (smp_scan_config(0x0, 0x400) ||
+ smp_scan_config(639 * 0x400, 0x400) ||
+ smp_scan_config(0xF0000, 0x10000))
return;
/*
* If it is an SMP machine we should know now, unless the
@@ -767,7 +747,7 @@ void __init default_find_smp_config(unsigned int reserve)
address = get_bios_ebda();
if (address)
- smp_scan_config(address, 0x400, reserve);
+ smp_scan_config(address, 0x400);
}
#ifdef CONFIG_X86_IO_APIC
@@ -965,9 +945,6 @@ void __init early_reserve_e820_mpc_new(void)
{
if (enable_update_mptable && alloc_mptable) {
u64 startt = 0;
-#ifdef CONFIG_X86_TRAMPOLINE
- startt = TRAMPOLINE_BASE;
-#endif
mpc_new_phys = early_reserve_e820(startt, mpc_new_length, 4);
}
}
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index 553449951b84..4bd93c9b2b27 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -172,11 +172,10 @@ static long msr_ioctl(struct file *file, unsigned int ioc, unsigned long arg)
static int msr_open(struct inode *inode, struct file *file)
{
- unsigned int cpu = iminor(file->f_path.dentry->d_inode);
- struct cpuinfo_x86 *c = &cpu_data(cpu);
+ unsigned int cpu;
+ struct cpuinfo_x86 *c;
cpu = iminor(file->f_path.dentry->d_inode);
-
if (cpu >= nr_cpu_ids || !cpu_online(cpu))
return -ENXIO; /* No such CPU */
@@ -247,7 +246,7 @@ static int __init msr_init(void)
int i, err = 0;
i = 0;
- if (register_chrdev(MSR_MAJOR, "cpu/msr", &msr_fops)) {
+ if (__register_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr", &msr_fops)) {
printk(KERN_ERR "msr: unable to get major %d for msr\n",
MSR_MAJOR);
err = -EBUSY;
@@ -275,7 +274,7 @@ out_class:
msr_device_destroy(i);
class_destroy(msr_class);
out_chrdev:
- unregister_chrdev(MSR_MAJOR, "cpu/msr");
+ __unregister_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr");
out:
return err;
}
diff --git a/arch/x86/kernel/olpc.c b/arch/x86/kernel/olpc.c
index 4006c522adc7..9d1d263f786f 100644
--- a/arch/x86/kernel/olpc.c
+++ b/arch/x86/kernel/olpc.c
@@ -212,7 +212,7 @@ static int __init olpc_init(void)
unsigned char *romsig;
/* The ioremap check is dangerous; limit what we run it on */
- if (!is_geode() || geode_has_vsa2())
+ if (!is_geode() || cs5535_has_vsa2())
return 0;
spin_lock_init(&ec_lock);
@@ -244,7 +244,7 @@ static int __init olpc_init(void)
(unsigned char *) &olpc_platform_info.ecver, 1);
/* check to see if the VSA exists */
- if (geode_has_vsa2())
+ if (cs5535_has_vsa2())
olpc_platform_info.flags |= OLPC_F_VSA;
printk(KERN_INFO "OLPC board revision %s%X (EC=%x)\n",
diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c
index 3a7c5a44082e..676b8c77a976 100644
--- a/arch/x86/kernel/paravirt-spinlocks.c
+++ b/arch/x86/kernel/paravirt-spinlocks.c
@@ -8,9 +8,9 @@
#include <asm/paravirt.h>
static inline void
-default_spin_lock_flags(raw_spinlock_t *lock, unsigned long flags)
+default_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags)
{
- __raw_spin_lock(lock);
+ arch_spin_lock(lock);
}
struct pv_lock_ops pv_lock_ops = {
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index c563e4c8ff39..2bbde6078143 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -31,7 +31,7 @@
#include <linux/string.h>
#include <linux/crash_dump.h>
#include <linux/dma-mapping.h>
-#include <linux/bitops.h>
+#include <linux/bitmap.h>
#include <linux/pci_ids.h>
#include <linux/pci.h>
#include <linux/delay.h>
@@ -212,7 +212,7 @@ static void iommu_range_reserve(struct iommu_table *tbl,
spin_lock_irqsave(&tbl->it_lock, flags);
- iommu_area_reserve(tbl->it_map, index, npages);
+ bitmap_set(tbl->it_map, index, npages);
spin_unlock_irqrestore(&tbl->it_lock, flags);
}
@@ -303,7 +303,7 @@ static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
spin_lock_irqsave(&tbl->it_lock, flags);
- iommu_area_free(tbl->it_map, entry, npages);
+ bitmap_clear(tbl->it_map, entry, npages);
spin_unlock_irqrestore(&tbl->it_lock, flags);
}
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index afcc58b69c7c..75e14e21f61a 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -124,8 +124,8 @@ void __init pci_iommu_alloc(void)
/* free the range so iommu could get some range less than 4G */
dma32_free_bootmem();
#endif
- if (pci_swiotlb_init())
- return;
+ if (pci_swiotlb_detect())
+ goto out;
gart_iommu_hole_init();
@@ -135,6 +135,8 @@ void __init pci_iommu_alloc(void)
/* needs to be called after gart_iommu_hole_init */
amd_iommu_detect();
+out:
+ pci_swiotlb_init();
}
void *dma_generic_alloc_coherent(struct device *dev, size_t size,
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index e6a0d402f171..34de53b46f87 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -23,7 +23,7 @@
#include <linux/module.h>
#include <linux/topology.h>
#include <linux/interrupt.h>
-#include <linux/bitops.h>
+#include <linux/bitmap.h>
#include <linux/kdebug.h>
#include <linux/scatterlist.h>
#include <linux/iommu-helper.h>
@@ -126,7 +126,7 @@ static void free_iommu(unsigned long offset, int size)
unsigned long flags;
spin_lock_irqsave(&iommu_bitmap_lock, flags);
- iommu_area_free(iommu_gart_bitmap, offset, size);
+ bitmap_clear(iommu_gart_bitmap, offset, size);
if (offset >= next_bit)
next_bit = offset + size;
spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
@@ -710,7 +710,8 @@ static void gart_iommu_shutdown(void)
struct pci_dev *dev;
int i;
- if (no_agp)
+ /* don't shutdown it if there is AGP installed */
+ if (!no_agp)
return;
for (i = 0; i < num_k8_northbridges; i++) {
@@ -791,7 +792,7 @@ int __init gart_iommu_init(void)
* Out of IOMMU space handling.
* Reserve some invalid pages at the beginning of the GART.
*/
- iommu_area_reserve(iommu_gart_bitmap, 0, EMERGENCY_PAGES);
+ bitmap_set(iommu_gart_bitmap, 0, EMERGENCY_PAGES);
pr_info("PCI-DMA: Reserving %luMB of IOMMU area in the AGP aperture\n",
iommu_size >> 20);
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
index e3c0a66b9e77..7d2829dde20e 100644
--- a/arch/x86/kernel/pci-swiotlb.c
+++ b/arch/x86/kernel/pci-swiotlb.c
@@ -43,12 +43,12 @@ static struct dma_map_ops swiotlb_dma_ops = {
};
/*
- * pci_swiotlb_init - initialize swiotlb if necessary
+ * pci_swiotlb_detect - set swiotlb to 1 if necessary
*
* This returns non-zero if we are forced to use swiotlb (by the boot
* option).
*/
-int __init pci_swiotlb_init(void)
+int __init pci_swiotlb_detect(void)
{
int use_swiotlb = swiotlb | swiotlb_force;
@@ -60,10 +60,13 @@ int __init pci_swiotlb_init(void)
if (swiotlb_force)
swiotlb = 1;
+ return use_swiotlb;
+}
+
+void __init pci_swiotlb_init(void)
+{
if (swiotlb) {
swiotlb_init(0);
dma_ops = &swiotlb_dma_ops;
}
-
- return use_swiotlb;
}
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 744508e7cfdd..98c2cdeb599e 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -9,6 +9,9 @@
#include <linux/pm.h>
#include <linux/clockchips.h>
#include <linux/random.h>
+#include <linux/user-return-notifier.h>
+#include <linux/dmi.h>
+#include <linux/utsname.h>
#include <trace/events/power.h>
#include <linux/hw_breakpoint.h>
#include <asm/system.h>
@@ -89,6 +92,25 @@ void exit_thread(void)
}
}
+void show_regs_common(void)
+{
+ const char *board, *product;
+
+ board = dmi_get_system_info(DMI_BOARD_NAME);
+ if (!board)
+ board = "";
+ product = dmi_get_system_info(DMI_PRODUCT_NAME);
+ if (!product)
+ product = "";
+
+ printk("\n");
+ printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s %s/%s\n",
+ current->pid, current->comm, print_tainted(),
+ init_utsname()->release,
+ (int)strcspn(init_utsname()->version, " "),
+ init_utsname()->version, board, product);
+}
+
void flush_thread(void)
{
struct task_struct *tsk = current;
@@ -209,6 +231,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
*/
memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
}
+ propagate_user_return_notify(prev_p, next_p);
}
int sys_fork(struct pt_regs *regs)
@@ -232,6 +255,76 @@ int sys_vfork(struct pt_regs *regs)
NULL, NULL);
}
+long
+sys_clone(unsigned long clone_flags, unsigned long newsp,
+ void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
+{
+ if (!newsp)
+ newsp = regs->sp;
+ return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
+}
+
+/*
+ * This gets run with %si containing the
+ * function to call, and %di containing
+ * the "args".
+ */
+extern void kernel_thread_helper(void);
+
+/*
+ * Create a kernel thread
+ */
+int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
+{
+ struct pt_regs regs;
+
+ memset(&regs, 0, sizeof(regs));
+
+ regs.si = (unsigned long) fn;
+ regs.di = (unsigned long) arg;
+
+#ifdef CONFIG_X86_32
+ regs.ds = __USER_DS;
+ regs.es = __USER_DS;
+ regs.fs = __KERNEL_PERCPU;
+ regs.gs = __KERNEL_STACK_CANARY;
+#endif
+
+ regs.orig_ax = -1;
+ regs.ip = (unsigned long) kernel_thread_helper;
+ regs.cs = __KERNEL_CS | get_kernel_rpl();
+ regs.flags = X86_EFLAGS_IF | 0x2;
+
+ /* Ok, create the new process.. */
+ return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs, 0, NULL, NULL);
+}
+EXPORT_SYMBOL(kernel_thread);
+
+/*
+ * sys_execve() executes a new program.
+ */
+long sys_execve(char __user *name, char __user * __user *argv,
+ char __user * __user *envp, struct pt_regs *regs)
+{
+ long error;
+ char *filename;
+
+ filename = getname(name);
+ error = PTR_ERR(filename);
+ if (IS_ERR(filename))
+ return error;
+ error = do_execve(filename, argv, envp, regs);
+
+#ifdef CONFIG_X86_32
+ if (error == 0) {
+ /* Make sure we don't return using sysenter.. */
+ set_thread_flag(TIF_IRET);
+ }
+#endif
+
+ putname(filename);
+ return error;
+}
/*
* Idle related variables and functions
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 075580b35682..9c517b5858f0 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -23,7 +23,6 @@
#include <linux/vmalloc.h>
#include <linux/user.h>
#include <linux/interrupt.h>
-#include <linux/utsname.h>
#include <linux/delay.h>
#include <linux/reboot.h>
#include <linux/init.h>
@@ -35,7 +34,6 @@
#include <linux/tick.h>
#include <linux/percpu.h>
#include <linux/prctl.h>
-#include <linux/dmi.h>
#include <linux/ftrace.h>
#include <linux/uaccess.h>
#include <linux/io.h>
@@ -128,7 +126,6 @@ void __show_regs(struct pt_regs *regs, int all)
unsigned long d0, d1, d2, d3, d6, d7;
unsigned long sp;
unsigned short ss, gs;
- const char *board;
if (user_mode_vm(regs)) {
sp = regs->sp;
@@ -140,16 +137,7 @@ void __show_regs(struct pt_regs *regs, int all)
savesegment(gs, gs);
}
- printk("\n");
-
- board = dmi_get_system_info(DMI_PRODUCT_NAME);
- if (!board)
- board = "";
- printk("Pid: %d, comm: %s %s (%s %.*s) %s\n",
- task_pid_nr(current), current->comm,
- print_tainted(), init_utsname()->release,
- (int)strcspn(init_utsname()->version, " "),
- init_utsname()->version, board);
+ show_regs_common();
printk("EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n",
(u16)regs->cs, regs->ip, regs->flags,
@@ -192,39 +180,6 @@ void show_regs(struct pt_regs *regs)
show_trace(NULL, regs, &regs->sp, regs->bp);
}
-/*
- * This gets run with %bx containing the
- * function to call, and %dx containing
- * the "args".
- */
-extern void kernel_thread_helper(void);
-
-/*
- * Create a kernel thread
- */
-int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
-{
- struct pt_regs regs;
-
- memset(&regs, 0, sizeof(regs));
-
- regs.bx = (unsigned long) fn;
- regs.dx = (unsigned long) arg;
-
- regs.ds = __USER_DS;
- regs.es = __USER_DS;
- regs.fs = __KERNEL_PERCPU;
- regs.gs = __KERNEL_STACK_CANARY;
- regs.orig_ax = -1;
- regs.ip = (unsigned long) kernel_thread_helper;
- regs.cs = __KERNEL_CS | get_kernel_rpl();
- regs.flags = X86_EFLAGS_IF | X86_EFLAGS_SF | X86_EFLAGS_PF | 0x2;
-
- /* Ok, create the new process.. */
- return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs, 0, NULL, NULL);
-}
-EXPORT_SYMBOL(kernel_thread);
-
void release_thread(struct task_struct *dead_task)
{
BUG_ON(dead_task->mm);
@@ -436,46 +391,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
return prev_p;
}
-int sys_clone(struct pt_regs *regs)
-{
- unsigned long clone_flags;
- unsigned long newsp;
- int __user *parent_tidptr, *child_tidptr;
-
- clone_flags = regs->bx;
- newsp = regs->cx;
- parent_tidptr = (int __user *)regs->dx;
- child_tidptr = (int __user *)regs->di;
- if (!newsp)
- newsp = regs->sp;
- return do_fork(clone_flags, newsp, regs, 0, parent_tidptr, child_tidptr);
-}
-
-/*
- * sys_execve() executes a new program.
- */
-int sys_execve(struct pt_regs *regs)
-{
- int error;
- char *filename;
-
- filename = getname((char __user *) regs->bx);
- error = PTR_ERR(filename);
- if (IS_ERR(filename))
- goto out;
- error = do_execve(filename,
- (char __user * __user *) regs->cx,
- (char __user * __user *) regs->dx,
- regs);
- if (error == 0) {
- /* Make sure we don't return using sysenter.. */
- set_thread_flag(TIF_IRET);
- }
- putname(filename);
-out:
- return error;
-}
-
#define top_esp (THREAD_SIZE - sizeof(unsigned long))
#define top_ebp (THREAD_SIZE - 2*sizeof(unsigned long))
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index a98fe88fab64..52fbd0c60198 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -26,7 +26,6 @@
#include <linux/slab.h>
#include <linux/user.h>
#include <linux/interrupt.h>
-#include <linux/utsname.h>
#include <linux/delay.h>
#include <linux/module.h>
#include <linux/ptrace.h>
@@ -38,7 +37,6 @@
#include <linux/uaccess.h>
#include <linux/io.h>
#include <linux/ftrace.h>
-#include <linux/dmi.h>
#include <asm/pgtable.h>
#include <asm/system.h>
@@ -59,8 +57,6 @@ asmlinkage extern void ret_from_fork(void);
DEFINE_PER_CPU(unsigned long, old_rsp);
static DEFINE_PER_CPU(unsigned char, is_idle);
-unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
-
static ATOMIC_NOTIFIER_HEAD(idle_notifier);
void idle_notifier_register(struct notifier_block *n)
@@ -163,18 +159,8 @@ void __show_regs(struct pt_regs *regs, int all)
unsigned long d0, d1, d2, d3, d6, d7;
unsigned int fsindex, gsindex;
unsigned int ds, cs, es;
- const char *board;
-
- printk("\n");
- print_modules();
- board = dmi_get_system_info(DMI_PRODUCT_NAME);
- if (!board)
- board = "";
- printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s %s\n",
- current->pid, current->comm, print_tainted(),
- init_utsname()->release,
- (int)strcspn(init_utsname()->version, " "),
- init_utsname()->version, board);
+
+ show_regs_common();
printk(KERN_INFO "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
printk_address(regs->ip, 1);
printk(KERN_INFO "RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss,
@@ -285,8 +271,9 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
*childregs = *regs;
childregs->ax = 0;
- childregs->sp = sp;
- if (sp == ~0UL)
+ if (user_mode(regs))
+ childregs->sp = sp;
+ else
childregs->sp = (unsigned long)childregs;
p->thread.sp = (unsigned long) childregs;
@@ -349,26 +336,42 @@ out:
return err;
}
-void
-start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
+static void
+start_thread_common(struct pt_regs *regs, unsigned long new_ip,
+ unsigned long new_sp,
+ unsigned int _cs, unsigned int _ss, unsigned int _ds)
{
loadsegment(fs, 0);
- loadsegment(es, 0);
- loadsegment(ds, 0);
+ loadsegment(es, _ds);
+ loadsegment(ds, _ds);
load_gs_index(0);
regs->ip = new_ip;
regs->sp = new_sp;
percpu_write(old_rsp, new_sp);
- regs->cs = __USER_CS;
- regs->ss = __USER_DS;
- regs->flags = 0x200;
+ regs->cs = _cs;
+ regs->ss = _ss;
+ regs->flags = X86_EFLAGS_IF;
set_fs(USER_DS);
/*
* Free the old FP and other extended state
*/
free_thread_xstate(current);
}
-EXPORT_SYMBOL_GPL(start_thread);
+
+void
+start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
+{
+ start_thread_common(regs, new_ip, new_sp,
+ __USER_CS, __USER_DS, 0);
+}
+
+#ifdef CONFIG_IA32_EMULATION
+void start_thread_ia32(struct pt_regs *regs, u32 new_ip, u32 new_sp)
+{
+ start_thread_common(regs, new_ip, new_sp,
+ __USER32_CS, __USER32_DS, __USER32_DS);
+}
+#endif
/*
* switch_to(x,y) should switch tasks from x to y.
@@ -504,25 +507,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
return prev_p;
}
-/*
- * sys_execve() executes a new program.
- */
-asmlinkage
-long sys_execve(char __user *name, char __user * __user *argv,
- char __user * __user *envp, struct pt_regs *regs)
-{
- long error;
- char *filename;
-
- filename = getname(name);
- error = PTR_ERR(filename);
- if (IS_ERR(filename))
- return error;
- error = do_execve(filename, argv, envp, regs);
- putname(filename);
- return error;
-}
-
void set_personality_64bit(void)
{
/* inherit personality from parent */
@@ -537,15 +521,6 @@ void set_personality_64bit(void)
current->personality &= ~READ_IMPLIES_EXEC;
}
-asmlinkage long
-sys_clone(unsigned long clone_flags, unsigned long newsp,
- void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
-{
- if (!newsp)
- newsp = regs->sp;
- return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
-}
-
unsigned long get_wchan(struct task_struct *p)
{
unsigned long stack;
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 04d182a7cfdb..017d937639fe 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -509,14 +509,14 @@ static int genregs_get(struct task_struct *target,
{
if (kbuf) {
unsigned long *k = kbuf;
- while (count > 0) {
+ while (count >= sizeof(*k)) {
*k++ = getreg(target, pos);
count -= sizeof(*k);
pos += sizeof(*k);
}
} else {
unsigned long __user *u = ubuf;
- while (count > 0) {
+ while (count >= sizeof(*u)) {
if (__put_user(getreg(target, pos), u++))
return -EFAULT;
count -= sizeof(*u);
@@ -535,14 +535,14 @@ static int genregs_set(struct task_struct *target,
int ret = 0;
if (kbuf) {
const unsigned long *k = kbuf;
- while (count > 0 && !ret) {
+ while (count >= sizeof(*k) && !ret) {
ret = putreg(target, pos, *k++);
count -= sizeof(*k);
pos += sizeof(*k);
}
} else {
const unsigned long __user *u = ubuf;
- while (count > 0 && !ret) {
+ while (count >= sizeof(*u) && !ret) {
unsigned long word;
ret = __get_user(word, u++);
if (ret)
@@ -555,7 +555,9 @@ static int genregs_set(struct task_struct *target,
return ret;
}
-static void ptrace_triggered(struct perf_event *bp, void *data)
+static void ptrace_triggered(struct perf_event *bp, int nmi,
+ struct perf_sample_data *data,
+ struct pt_regs *regs)
{
int i;
struct thread_struct *thread = &(current->thread);
@@ -593,13 +595,13 @@ static unsigned long ptrace_get_dr7(struct perf_event *bp[])
return dr7;
}
-static struct perf_event *
+static int
ptrace_modify_breakpoint(struct perf_event *bp, int len, int type,
struct task_struct *tsk, int disabled)
{
int err;
int gen_len, gen_type;
- DEFINE_BREAKPOINT_ATTR(attr);
+ struct perf_event_attr attr;
/*
* We shoud have at least an inactive breakpoint at this
@@ -607,18 +609,18 @@ ptrace_modify_breakpoint(struct perf_event *bp, int len, int type,
* written the address register first
*/
if (!bp)
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
err = arch_bp_generic_fields(len, type, &gen_len, &gen_type);
if (err)
- return ERR_PTR(err);
+ return err;
attr = bp->attr;
attr.bp_len = gen_len;
attr.bp_type = gen_type;
attr.disabled = disabled;
- return modify_user_hw_breakpoint(bp, &attr, bp->callback, tsk);
+ return modify_user_hw_breakpoint(bp, &attr);
}
/*
@@ -656,28 +658,17 @@ restore:
if (!second_pass)
continue;
- thread->ptrace_bps[i] = NULL;
- bp = ptrace_modify_breakpoint(bp, len, type,
+ rc = ptrace_modify_breakpoint(bp, len, type,
tsk, 1);
- if (IS_ERR(bp)) {
- rc = PTR_ERR(bp);
- thread->ptrace_bps[i] = NULL;
+ if (rc)
break;
- }
- thread->ptrace_bps[i] = bp;
}
continue;
}
- bp = ptrace_modify_breakpoint(bp, len, type, tsk, 0);
-
- /* Incorrect bp, or we have a bug in bp API */
- if (IS_ERR(bp)) {
- rc = PTR_ERR(bp);
- thread->ptrace_bps[i] = NULL;
+ rc = ptrace_modify_breakpoint(bp, len, type, tsk, 0);
+ if (rc)
break;
- }
- thread->ptrace_bps[i] = bp;
}
/*
* Make a second pass to free the remaining unused breakpoints
@@ -721,9 +712,10 @@ static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr,
{
struct perf_event *bp;
struct thread_struct *t = &tsk->thread;
- DEFINE_BREAKPOINT_ATTR(attr);
+ struct perf_event_attr attr;
if (!t->ptrace_bps[nr]) {
+ hw_breakpoint_init(&attr);
/*
* Put stub len and type to register (reserve) an inactive but
* correct bp
@@ -734,26 +726,32 @@ static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr,
attr.disabled = 1;
bp = register_user_hw_breakpoint(&attr, ptrace_triggered, tsk);
+
+ /*
+ * CHECKME: the previous code returned -EIO if the addr wasn't
+ * a valid task virtual addr. The new one will return -EINVAL in
+ * this case.
+ * -EINVAL may be what we want for in-kernel breakpoints users,
+ * but -EIO looks better for ptrace, since we refuse a register
+ * writing for the user. And anyway this is the previous
+ * behaviour.
+ */
+ if (IS_ERR(bp))
+ return PTR_ERR(bp);
+
+ t->ptrace_bps[nr] = bp;
} else {
+ int err;
+
bp = t->ptrace_bps[nr];
- t->ptrace_bps[nr] = NULL;
attr = bp->attr;
attr.bp_addr = addr;
- bp = modify_user_hw_breakpoint(bp, &attr, bp->callback, tsk);
+ err = modify_user_hw_breakpoint(bp, &attr);
+ if (err)
+ return err;
}
- /*
- * CHECKME: the previous code returned -EIO if the addr wasn't a
- * valid task virtual addr. The new one will return -EINVAL in this
- * case.
- * -EINVAL may be what we want for in-kernel breakpoints users, but
- * -EIO looks better for ptrace, since we refuse a register writing
- * for the user. And anyway this is the previous behaviour.
- */
- if (IS_ERR(bp))
- return PTR_ERR(bp);
- t->ptrace_bps[nr] = bp;
return 0;
}
@@ -1460,14 +1458,14 @@ static int genregs32_get(struct task_struct *target,
{
if (kbuf) {
compat_ulong_t *k = kbuf;
- while (count > 0) {
+ while (count >= sizeof(*k)) {
getreg32(target, pos, k++);
count -= sizeof(*k);
pos += sizeof(*k);
}
} else {
compat_ulong_t __user *u = ubuf;
- while (count > 0) {
+ while (count >= sizeof(*u)) {
compat_ulong_t word;
getreg32(target, pos, &word);
if (__put_user(word, u++))
@@ -1488,14 +1486,14 @@ static int genregs32_set(struct task_struct *target,
int ret = 0;
if (kbuf) {
const compat_ulong_t *k = kbuf;
- while (count > 0 && !ret) {
+ while (count >= sizeof(*k) && !ret) {
ret = putreg32(target, pos, *k++);
count -= sizeof(*k);
pos += sizeof(*k);
}
} else {
const compat_ulong_t __user *u = ubuf;
- while (count > 0 && !ret) {
+ while (count >= sizeof(*u) && !ret) {
compat_ulong_t word;
ret = __get_user(word, u++);
if (ret)
@@ -1678,21 +1676,33 @@ const struct user_regset_view *task_user_regset_view(struct task_struct *task)
#endif
}
-void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs,
- int error_code, int si_code)
+static void fill_sigtrap_info(struct task_struct *tsk,
+ struct pt_regs *regs,
+ int error_code, int si_code,
+ struct siginfo *info)
{
- struct siginfo info;
-
tsk->thread.trap_no = 1;
tsk->thread.error_code = error_code;
- memset(&info, 0, sizeof(info));
- info.si_signo = SIGTRAP;
- info.si_code = si_code;
+ memset(info, 0, sizeof(*info));
+ info->si_signo = SIGTRAP;
+ info->si_code = si_code;
+ info->si_addr = user_mode_vm(regs) ? (void __user *)regs->ip : NULL;
+}
+
+void user_single_step_siginfo(struct task_struct *tsk,
+ struct pt_regs *regs,
+ struct siginfo *info)
+{
+ fill_sigtrap_info(tsk, regs, 0, TRAP_BRKPT, info);
+}
- /* User-mode ip? */
- info.si_addr = user_mode_vm(regs) ? (void __user *) regs->ip : NULL;
+void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs,
+ int error_code, int si_code)
+{
+ struct siginfo info;
+ fill_sigtrap_info(tsk, regs, error_code, si_code, &info);
/* Send us the fake SIGTRAP */
force_sig_info(SIGTRAP, &info, tsk);
}
@@ -1757,29 +1767,22 @@ asmregparm long syscall_trace_enter(struct pt_regs *regs)
asmregparm void syscall_trace_leave(struct pt_regs *regs)
{
+ bool step;
+
if (unlikely(current->audit_context))
audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax);
if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
trace_sys_exit(regs, regs->ax);
- if (test_thread_flag(TIF_SYSCALL_TRACE))
- tracehook_report_syscall_exit(regs, 0);
-
/*
* If TIF_SYSCALL_EMU is set, we only get here because of
* TIF_SINGLESTEP (i.e. this is PTRACE_SYSEMU_SINGLESTEP).
* We already reported this syscall instruction in
- * syscall_trace_enter(), so don't do any more now.
- */
- if (unlikely(test_thread_flag(TIF_SYSCALL_EMU)))
- return;
-
- /*
- * If we are single-stepping, synthesize a trap to follow the
- * system call instruction.
+ * syscall_trace_enter().
*/
- if (test_thread_flag(TIF_SINGLESTEP) &&
- tracehook_consider_fatal_signal(current, SIGTRAP))
- send_sigtrap(current, regs, 0, TRAP_BRKPT);
+ step = unlikely(test_thread_flag(TIF_SINGLESTEP)) &&
+ !test_thread_flag(TIF_SYSCALL_EMU);
+ if (step || test_thread_flag(TIF_SYSCALL_TRACE))
+ tracehook_report_syscall_exit(regs, step);
}
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index 6c3b2c6fd772..18093d7498f0 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -499,6 +499,7 @@ static void __init quirk_amd_nb_node(struct pci_dev *dev)
{
struct pci_dev *nb_ht;
unsigned int devfn;
+ u32 node;
u32 val;
devfn = PCI_DEVFN(PCI_SLOT(dev->devfn), 0);
@@ -507,7 +508,13 @@ static void __init quirk_amd_nb_node(struct pci_dev *dev)
return;
pci_read_config_dword(nb_ht, 0x60, &val);
- set_dev_node(&dev->dev, val & 7);
+ node = val & 7;
+ /*
+ * Some hardware may return an invalid node ID,
+ * so check it first:
+ */
+ if (node_online(node))
+ set_dev_node(&dev->dev, node);
pci_dev_put(nb_ht);
}
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 2b97fc5b124e..1545bc0c9845 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -259,6 +259,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
DMI_MATCH(DMI_PRODUCT_NAME, "SBC-FITPC2"),
},
},
+ { /* Handle problems with rebooting on ASUS P4S800 */
+ .callback = set_bios_reboot,
+ .ident = "ASUS P4S800",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
+ DMI_MATCH(DMI_BOARD_NAME, "P4S800"),
+ },
+ },
{ }
};
diff --git a/arch/x86/kernel/reboot_fixups_32.c b/arch/x86/kernel/reboot_fixups_32.c
index 61a837743fe5..fda313ebbb03 100644
--- a/arch/x86/kernel/reboot_fixups_32.c
+++ b/arch/x86/kernel/reboot_fixups_32.c
@@ -12,7 +12,7 @@
#include <linux/interrupt.h>
#include <asm/reboot_fixups.h>
#include <asm/msr.h>
-#include <asm/geode.h>
+#include <linux/cs5535.h>
static void cs5530a_warm_reset(struct pci_dev *dev)
{
@@ -80,6 +80,7 @@ void mach_reboot_fixups(void)
continue;
cur->reboot_fixup(dev);
+ pci_dev_put(dev);
}
}
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 82e88cdda9bc..f7b8b9894b22 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -73,6 +73,7 @@
#include <asm/mtrr.h>
#include <asm/apic.h>
+#include <asm/trampoline.h>
#include <asm/e820.h>
#include <asm/mpspec.h>
#include <asm/setup.h>
@@ -106,6 +107,7 @@
#include <asm/percpu.h>
#include <asm/topology.h>
#include <asm/apicdef.h>
+#include <asm/k8.h>
#ifdef CONFIG_X86_64
#include <asm/numa_64.h>
#endif
@@ -487,42 +489,11 @@ static void __init reserve_early_setup_data(void)
#ifdef CONFIG_KEXEC
-/**
- * Reserve @size bytes of crashkernel memory at any suitable offset.
- *
- * @size: Size of the crashkernel memory to reserve.
- * Returns the base address on success, and -1ULL on failure.
- */
-static
-unsigned long long __init find_and_reserve_crashkernel(unsigned long long size)
-{
- const unsigned long long alignment = 16<<20; /* 16M */
- unsigned long long start = 0LL;
-
- while (1) {
- int ret;
-
- start = find_e820_area(start, ULONG_MAX, size, alignment);
- if (start == -1ULL)
- return start;
-
- /* try to reserve it */
- ret = reserve_bootmem_generic(start, size, BOOTMEM_EXCLUSIVE);
- if (ret >= 0)
- return start;
-
- start += alignment;
- }
-}
-
static inline unsigned long long get_total_mem(void)
{
unsigned long long total;
- total = max_low_pfn - min_low_pfn;
-#ifdef CONFIG_HIGHMEM
- total += highend_pfn - highstart_pfn;
-#endif
+ total = max_pfn - min_low_pfn;
return total << PAGE_SHIFT;
}
@@ -542,21 +513,25 @@ static void __init reserve_crashkernel(void)
/* 0 means: find the address automatically */
if (crash_base <= 0) {
- crash_base = find_and_reserve_crashkernel(crash_size);
+ const unsigned long long alignment = 16<<20; /* 16M */
+
+ crash_base = find_e820_area(alignment, ULONG_MAX, crash_size,
+ alignment);
if (crash_base == -1ULL) {
- pr_info("crashkernel reservation failed. "
- "No suitable area found.\n");
+ pr_info("crashkernel reservation failed - No suitable area found.\n");
return;
}
} else {
- ret = reserve_bootmem_generic(crash_base, crash_size,
- BOOTMEM_EXCLUSIVE);
- if (ret < 0) {
- pr_info("crashkernel reservation failed - "
- "memory is in use\n");
+ unsigned long long start;
+
+ start = find_e820_area(crash_base, ULONG_MAX, crash_size,
+ 1<<20);
+ if (start != crash_base) {
+ pr_info("crashkernel reservation failed - memory is in use.\n");
return;
}
}
+ reserve_early(crash_base, crash_base + crash_size, "CRASH KERNEL");
printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
"for crashkernel (System RAM: %ldMB)\n",
@@ -699,6 +674,9 @@ static struct dmi_system_id __initdata bad_bios_dmi_table[] = {
void __init setup_arch(char **cmdline_p)
{
+ int acpi = 0;
+ int k8 = 0;
+
#ifdef CONFIG_X86_32
memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
visws_early_detect();
@@ -791,21 +769,18 @@ void __init setup_arch(char **cmdline_p)
strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
*cmdline_p = command_line;
-#ifdef CONFIG_X86_64
/*
- * Must call this twice: Once just to detect whether hardware doesn't
- * support NX (so that the early EHCI debug console setup can safely
- * call set_fixmap(), and then again after parsing early parameters to
- * honor the respective command line option.
+ * x86_configure_nx() is called before parse_early_param() to detect
+ * whether hardware doesn't support NX (so that the early EHCI debug
+ * console setup can safely call set_fixmap()). It may then be called
+ * again from within noexec_setup() during parsing early parameters
+ * to honor the respective command line option.
*/
- check_efer();
-#endif
+ x86_configure_nx();
parse_early_param();
-#ifdef CONFIG_X86_64
- check_efer();
-#endif
+ x86_report_nx();
/* Must be before kernel pagetables are setup */
vmi_activate();
@@ -901,6 +876,20 @@ void __init setup_arch(char **cmdline_p)
reserve_brk();
+ /*
+ * Find and reserve possible boot-time SMP configuration:
+ */
+ find_smp_config();
+
+ reserve_trampoline_memory();
+
+#ifdef CONFIG_ACPI_SLEEP
+ /*
+ * Reserve low memory region for sleep support.
+ * even before init_memory_mapping
+ */
+ acpi_reserve_wakeup_memory();
+#endif
init_gbpages();
/* max_pfn_mapped is updated here */
@@ -927,6 +916,8 @@ void __init setup_arch(char **cmdline_p)
reserve_initrd();
+ reserve_crashkernel();
+
vsmp_init();
io_delay_init();
@@ -942,23 +933,15 @@ void __init setup_arch(char **cmdline_p)
/*
* Parse SRAT to discover nodes.
*/
- acpi_numa_init();
+ acpi = acpi_numa_init();
#endif
- initmem_init(0, max_pfn);
-
-#ifdef CONFIG_ACPI_SLEEP
- /*
- * Reserve low memory region for sleep support.
- */
- acpi_reserve_bootmem();
+#ifdef CONFIG_K8_NUMA
+ if (!acpi)
+ k8 = !k8_numa_init(0, max_pfn);
#endif
- /*
- * Find and reserve possible boot-time SMP configuration:
- */
- find_smp_config();
- reserve_crashkernel();
+ initmem_init(0, max_pfn, acpi, k8);
#ifdef CONFIG_X86_64
/*
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index d559af913e1f..35abcb8b00e9 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -1,3 +1,5 @@
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
@@ -20,9 +22,9 @@
#include <asm/stackprotector.h>
#ifdef CONFIG_DEBUG_PER_CPU_MAPS
-# define DBG(x...) printk(KERN_DEBUG x)
+# define DBG(fmt, ...) pr_dbg(fmt, ##__VA_ARGS__)
#else
-# define DBG(x...)
+# define DBG(fmt, ...) do { if (0) pr_dbg(fmt, ##__VA_ARGS__); } while (0)
#endif
DEFINE_PER_CPU(int, cpu_number);
@@ -116,8 +118,8 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size,
} else {
ptr = __alloc_bootmem_node_nopanic(NODE_DATA(node),
size, align, goal);
- pr_debug("per cpu data for cpu%d %lu bytes on node%d at "
- "%016lx\n", cpu, size, node, __pa(ptr));
+ pr_debug("per cpu data for cpu%d %lu bytes on node%d at %016lx\n",
+ cpu, size, node, __pa(ptr));
}
return ptr;
#else
@@ -198,8 +200,7 @@ void __init setup_per_cpu_areas(void)
pcpu_cpu_distance,
pcpu_fc_alloc, pcpu_fc_free);
if (rc < 0)
- pr_warning("PERCPU: %s allocator failed (%d), "
- "falling back to page size\n",
+ pr_warning("%s allocator failed (%d), falling back to page size\n",
pcpu_fc_names[pcpu_chosen_fc], rc);
}
if (rc < 0)
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index fbf3b07c8567..4fd173cd8e57 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -19,6 +19,7 @@
#include <linux/stddef.h>
#include <linux/personality.h>
#include <linux/uaccess.h>
+#include <linux/user-return-notifier.h>
#include <asm/processor.h>
#include <asm/ucontext.h>
@@ -544,22 +545,12 @@ sys_sigaction(int sig, const struct old_sigaction __user *act,
}
#endif /* CONFIG_X86_32 */
-#ifdef CONFIG_X86_32
-int sys_sigaltstack(struct pt_regs *regs)
-{
- const stack_t __user *uss = (const stack_t __user *)regs->bx;
- stack_t __user *uoss = (stack_t __user *)regs->cx;
-
- return do_sigaltstack(uss, uoss, regs->sp);
-}
-#else /* !CONFIG_X86_32 */
-asmlinkage long
+long
sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
struct pt_regs *regs)
{
return do_sigaltstack(uss, uoss, regs->sp);
}
-#endif /* CONFIG_X86_32 */
/*
* Do a signal return; undo the signal stack.
@@ -863,6 +854,8 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
if (current->replacement_session_keyring)
key_replace_session_keyring();
}
+ if (thread_info_flags & _TIF_USER_RETURN_NOTIFY)
+ fire_user_return_notifiers();
#ifdef CONFIG_X86_32
clear_thread_flag(TIF_IRET);
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 324f2a44c221..678d0b8c26f3 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -671,6 +671,26 @@ static void __cpuinit do_fork_idle(struct work_struct *work)
complete(&c_idle->done);
}
+/* reduce the number of lines printed when booting a large cpu count system */
+static void __cpuinit announce_cpu(int cpu, int apicid)
+{
+ static int current_node = -1;
+ int node = cpu_to_node(cpu);
+
+ if (system_state == SYSTEM_BOOTING) {
+ if (node != current_node) {
+ if (current_node > (-1))
+ pr_cont(" Ok.\n");
+ current_node = node;
+ pr_info("Booting Node %3d, Processors ", node);
+ }
+ pr_cont(" #%d%s", cpu, cpu == (nr_cpu_ids - 1) ? " Ok.\n" : "");
+ return;
+ } else
+ pr_info("Booting Node %d Processor %d APIC 0x%x\n",
+ node, cpu, apicid);
+}
+
/*
* NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
* (ie clustered apic addressing mode), this is a LOGICAL apic ID.
@@ -687,7 +707,7 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu)
.done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done),
};
- INIT_WORK(&c_idle.work, do_fork_idle);
+ INIT_WORK_ON_STACK(&c_idle.work, do_fork_idle);
alternatives_smp_switch(1);
@@ -713,6 +733,7 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu)
if (IS_ERR(c_idle.idle)) {
printk("failed fork for CPU %d\n", cpu);
+ destroy_work_on_stack(&c_idle.work);
return PTR_ERR(c_idle.idle);
}
@@ -736,9 +757,8 @@ do_rest:
/* start_ip had better be page-aligned! */
start_ip = setup_trampoline();
- /* So we see what's up */
- printk(KERN_INFO "Booting processor %d APIC 0x%x ip 0x%lx\n",
- cpu, apicid, start_ip);
+ /* So we see what's up */
+ announce_cpu(cpu, apicid);
/*
* This grunge runs the startup process for
@@ -787,21 +807,17 @@ do_rest:
udelay(100);
}
- if (cpumask_test_cpu(cpu, cpu_callin_mask)) {
- /* number CPUs logically, starting from 1 (BSP is 0) */
- pr_debug("OK.\n");
- printk(KERN_INFO "CPU%d: ", cpu);
- print_cpu_info(&cpu_data(cpu));
- pr_debug("CPU has booted.\n");
- } else {
+ if (cpumask_test_cpu(cpu, cpu_callin_mask))
+ pr_debug("CPU%d: has booted.\n", cpu);
+ else {
boot_error = 1;
if (*((volatile unsigned char *)trampoline_base)
== 0xA5)
/* trampoline started but...? */
- printk(KERN_ERR "Stuck ??\n");
+ pr_err("CPU%d: Stuck ??\n", cpu);
else
/* trampoline code not run */
- printk(KERN_ERR "Not responding.\n");
+ pr_err("CPU%d: Not responding.\n", cpu);
if (apic->inquire_remote_apic)
apic->inquire_remote_apic(apicid);
}
@@ -831,6 +847,7 @@ do_rest:
smpboot_restore_warm_reset_vector();
}
+ destroy_work_on_stack(&c_idle.work);
return boot_error;
}
@@ -1291,14 +1308,16 @@ void native_cpu_die(unsigned int cpu)
for (i = 0; i < 10; i++) {
/* They ack this in play_dead by setting CPU_DEAD */
if (per_cpu(cpu_state, cpu) == CPU_DEAD) {
- printk(KERN_INFO "CPU %d is now offline\n", cpu);
+ if (system_state == SYSTEM_RUNNING)
+ pr_info("CPU %u is now offline\n", cpu);
+
if (1 == num_online_cpus())
alternatives_smp_switch(0);
return;
}
msleep(100);
}
- printk(KERN_ERR "CPU %u didn't die...\n", cpu);
+ pr_err("CPU %u didn't die...\n", cpu);
}
void play_dead_common(void)
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index c3eb207181fe..922eefbb3f6c 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -53,17 +53,19 @@ save_stack_address_nosched(void *data, unsigned long addr, int reliable)
}
static const struct stacktrace_ops save_stack_ops = {
- .warning = save_stack_warning,
- .warning_symbol = save_stack_warning_symbol,
- .stack = save_stack_stack,
- .address = save_stack_address,
+ .warning = save_stack_warning,
+ .warning_symbol = save_stack_warning_symbol,
+ .stack = save_stack_stack,
+ .address = save_stack_address,
+ .walk_stack = print_context_stack,
};
static const struct stacktrace_ops save_stack_ops_nosched = {
- .warning = save_stack_warning,
- .warning_symbol = save_stack_warning_symbol,
- .stack = save_stack_stack,
- .address = save_stack_address_nosched,
+ .warning = save_stack_warning,
+ .warning_symbol = save_stack_warning_symbol,
+ .stack = save_stack_stack,
+ .address = save_stack_address_nosched,
+ .walk_stack = print_context_stack,
};
/*
diff --git a/arch/x86/kernel/sys_i386_32.c b/arch/x86/kernel/sys_i386_32.c
index 1884a8d12bfa..dee1ff7cba58 100644
--- a/arch/x86/kernel/sys_i386_32.c
+++ b/arch/x86/kernel/sys_i386_32.c
@@ -24,31 +24,6 @@
#include <asm/syscalls.h>
-asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
- unsigned long prot, unsigned long flags,
- unsigned long fd, unsigned long pgoff)
-{
- int error = -EBADF;
- struct file *file = NULL;
- struct mm_struct *mm = current->mm;
-
- flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
- if (!(flags & MAP_ANONYMOUS)) {
- file = fget(fd);
- if (!file)
- goto out;
- }
-
- down_write(&mm->mmap_sem);
- error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
- up_write(&mm->mmap_sem);
-
- if (file)
- fput(file);
-out:
- return error;
-}
-
/*
* Perform the select(nd, in, out, ex, tv) and mmap() system
* calls. Linux/i386 didn't use to be able to handle more than
@@ -77,7 +52,7 @@ asmlinkage int old_mmap(struct mmap_arg_struct __user *arg)
if (a.offset & ~PAGE_MASK)
goto out;
- err = sys_mmap2(a.addr, a.len, a.prot, a.flags,
+ err = sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags,
a.fd, a.offset >> PAGE_SHIFT);
out:
return err;
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
index 45e00eb09c3a..8aa2057efd12 100644
--- a/arch/x86/kernel/sys_x86_64.c
+++ b/arch/x86/kernel/sys_x86_64.c
@@ -23,26 +23,11 @@ SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len,
unsigned long, fd, unsigned long, off)
{
long error;
- struct file *file;
-
error = -EINVAL;
if (off & ~PAGE_MASK)
goto out;
- error = -EBADF;
- file = NULL;
- flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
- if (!(flags & MAP_ANONYMOUS)) {
- file = fget(fd);
- if (!file)
- goto out;
- }
- down_write(&current->mm->mmap_sem);
- error = do_mmap_pgoff(file, addr, len, prot, flags, off >> PAGE_SHIFT);
- up_write(&current->mm->mmap_sem);
-
- if (file)
- fput(file);
+ error = sys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT);
out:
return error;
}
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index 0157cd26d7cc..15228b5d3eb7 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -191,7 +191,7 @@ ENTRY(sys_call_table)
.long sys_ni_syscall /* reserved for streams2 */
.long ptregs_vfork /* 190 */
.long sys_getrlimit
- .long sys_mmap2
+ .long sys_mmap_pgoff
.long sys_truncate64
.long sys_ftruncate64
.long sys_stat64 /* 195 */
@@ -336,3 +336,4 @@ ENTRY(sys_call_table)
.long sys_pwritev
.long sys_rt_tgsigqueueinfo /* 335 */
.long sys_perf_event_open
+ .long sys_recvmmsg
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c
index 1740c85e24bb..364d015efebc 100644
--- a/arch/x86/kernel/tlb_uv.c
+++ b/arch/x86/kernel/tlb_uv.c
@@ -817,10 +817,8 @@ static int __init uv_init_blade(int blade)
*/
apicid = blade_to_first_apicid(blade);
pa = uv_read_global_mmr64(pnode, UVH_BAU_DATA_CONFIG);
- if ((pa & 0xff) != UV_BAU_MESSAGE) {
- uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG,
+ uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG,
((apicid << 32) | UV_BAU_MESSAGE));
- }
return 0;
}
diff --git a/arch/x86/kernel/trampoline.c b/arch/x86/kernel/trampoline.c
index cd022121cab6..c652ef62742d 100644
--- a/arch/x86/kernel/trampoline.c
+++ b/arch/x86/kernel/trampoline.c
@@ -12,21 +12,19 @@
#endif
/* ready for x86_64 and x86 */
-unsigned char *__trampinitdata trampoline_base = __va(TRAMPOLINE_BASE);
+unsigned char *__trampinitdata trampoline_base;
void __init reserve_trampoline_memory(void)
{
-#ifdef CONFIG_X86_32
- /*
- * But first pinch a few for the stack/trampoline stuff
- * FIXME: Don't need the extra page at 4K, but need to fix
- * trampoline before removing it. (see the GDT stuff)
- */
- reserve_early(PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, "EX TRAMPOLINE");
-#endif
+ unsigned long mem;
+
/* Has to be in very low memory so we can execute real-mode AP code. */
- reserve_early(TRAMPOLINE_BASE, TRAMPOLINE_BASE + TRAMPOLINE_SIZE,
- "TRAMPOLINE");
+ mem = find_e820_area(0, 1<<20, TRAMPOLINE_SIZE, PAGE_SIZE);
+ if (mem == -1L)
+ panic("Cannot allocate trampoline\n");
+
+ trampoline_base = __va(mem);
+ reserve_early(mem, mem + TRAMPOLINE_SIZE, "TRAMPOLINE");
}
/*
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index cd982f48e23e..597683aa5ba0 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -763,6 +763,7 @@ void mark_tsc_unstable(char *reason)
{
if (!tsc_unstable) {
tsc_unstable = 1;
+ sched_clock_stable = 0;
printk(KERN_INFO "Marking TSC unstable due to %s\n", reason);
/* Change only the rating, when not registered */
if (clocksource_tsc.mult)
diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c
index eed156851f5d..0aa5fed8b9e6 100644
--- a/arch/x86/kernel/tsc_sync.c
+++ b/arch/x86/kernel/tsc_sync.c
@@ -33,7 +33,7 @@ static __cpuinitdata atomic_t stop_count;
* we want to have the fastest, inlined, non-debug version
* of a critical section, to be able to prove TSC time-warps:
*/
-static __cpuinitdata raw_spinlock_t sync_lock = __RAW_SPIN_LOCK_UNLOCKED;
+static __cpuinitdata arch_spinlock_t sync_lock = __ARCH_SPIN_LOCK_UNLOCKED;
static __cpuinitdata cycles_t last_tsc;
static __cpuinitdata cycles_t max_warp;
@@ -62,13 +62,13 @@ static __cpuinit void check_tsc_warp(void)
* previous TSC that was measured (possibly on
* another CPU) and update the previous TSC timestamp.
*/
- __raw_spin_lock(&sync_lock);
+ arch_spin_lock(&sync_lock);
prev = last_tsc;
rdtsc_barrier();
now = get_cycles();
rdtsc_barrier();
last_tsc = now;
- __raw_spin_unlock(&sync_lock);
+ arch_spin_unlock(&sync_lock);
/*
* Be nice every now and then (and also check whether
@@ -87,10 +87,10 @@ static __cpuinit void check_tsc_warp(void)
* we saw a time-warp of the TSC going backwards:
*/
if (unlikely(prev > now)) {
- __raw_spin_lock(&sync_lock);
+ arch_spin_lock(&sync_lock);
max_warp = max(max_warp, prev - now);
nr_warps++;
- __raw_spin_unlock(&sync_lock);
+ arch_spin_unlock(&sync_lock);
}
}
WARN(!(now-start),
diff --git a/arch/x86/kernel/uv_irq.c b/arch/x86/kernel/uv_irq.c
index 61d805df4c91..ece73d8e3240 100644
--- a/arch/x86/kernel/uv_irq.c
+++ b/arch/x86/kernel/uv_irq.c
@@ -215,8 +215,7 @@ static int uv_set_irq_affinity(unsigned int irq, const struct cpumask *mask)
unsigned long mmr_offset;
unsigned mmr_pnode;
- dest = set_desc_affinity(desc, mask);
- if (dest == BAD_APICID)
+ if (set_desc_affinity(desc, mask, &dest))
return -1;
mmr_value = 0;
diff --git a/arch/x86/kernel/uv_time.c b/arch/x86/kernel/uv_time.c
index 583f11d5c480..3c84aa001c11 100644
--- a/arch/x86/kernel/uv_time.c
+++ b/arch/x86/kernel/uv_time.c
@@ -74,7 +74,7 @@ struct uv_rtc_timer_head {
*/
static struct uv_rtc_timer_head **blade_info __read_mostly;
-static int uv_rtc_enable;
+static int uv_rtc_evt_enable;
/*
* Hardware interface routines
@@ -90,7 +90,7 @@ static void uv_rtc_send_IPI(int cpu)
pnode = uv_apicid_to_pnode(apicid);
val = (1UL << UVH_IPI_INT_SEND_SHFT) |
(apicid << UVH_IPI_INT_APIC_ID_SHFT) |
- (GENERIC_INTERRUPT_VECTOR << UVH_IPI_INT_VECTOR_SHFT);
+ (X86_PLATFORM_IPI_VECTOR << UVH_IPI_INT_VECTOR_SHFT);
uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
}
@@ -115,7 +115,7 @@ static int uv_setup_intr(int cpu, u64 expires)
uv_write_global_mmr64(pnode, UVH_EVENT_OCCURRED0_ALIAS,
UVH_EVENT_OCCURRED0_RTC1_MASK);
- val = (GENERIC_INTERRUPT_VECTOR << UVH_RTC1_INT_CONFIG_VECTOR_SHFT) |
+ val = (X86_PLATFORM_IPI_VECTOR << UVH_RTC1_INT_CONFIG_VECTOR_SHFT) |
((u64)cpu_physical_id(cpu) << UVH_RTC1_INT_CONFIG_APIC_ID_SHFT);
/* Set configuration */
@@ -123,7 +123,10 @@ static int uv_setup_intr(int cpu, u64 expires)
/* Initialize comparator value */
uv_write_global_mmr64(pnode, UVH_INT_CMPB, expires);
- return (expires < uv_read_rtc(NULL) && !uv_intr_pending(pnode));
+ if (uv_read_rtc(NULL) <= expires)
+ return 0;
+
+ return !uv_intr_pending(pnode);
}
/*
@@ -223,6 +226,7 @@ static int uv_rtc_set_timer(int cpu, u64 expires)
next_cpu = head->next_cpu;
*t = expires;
+
/* Will this one be next to go off? */
if (next_cpu < 0 || bcpu == next_cpu ||
expires < head->cpu[next_cpu].expires) {
@@ -231,7 +235,7 @@ static int uv_rtc_set_timer(int cpu, u64 expires)
*t = ULLONG_MAX;
uv_rtc_find_next_timer(head, pnode);
spin_unlock_irqrestore(&head->lock, flags);
- return 1;
+ return -ETIME;
}
}
@@ -244,7 +248,7 @@ static int uv_rtc_set_timer(int cpu, u64 expires)
*
* Returns 1 if this timer was pending.
*/
-static int uv_rtc_unset_timer(int cpu)
+static int uv_rtc_unset_timer(int cpu, int force)
{
int pnode = uv_cpu_to_pnode(cpu);
int bid = uv_cpu_to_blade_id(cpu);
@@ -256,14 +260,15 @@ static int uv_rtc_unset_timer(int cpu)
spin_lock_irqsave(&head->lock, flags);
- if (head->next_cpu == bcpu && uv_read_rtc(NULL) >= *t)
+ if ((head->next_cpu == bcpu && uv_read_rtc(NULL) >= *t) || force)
rc = 1;
- *t = ULLONG_MAX;
-
- /* Was the hardware setup for this timer? */
- if (head->next_cpu == bcpu)
- uv_rtc_find_next_timer(head, pnode);
+ if (rc) {
+ *t = ULLONG_MAX;
+ /* Was the hardware setup for this timer? */
+ if (head->next_cpu == bcpu)
+ uv_rtc_find_next_timer(head, pnode);
+ }
spin_unlock_irqrestore(&head->lock, flags);
@@ -310,32 +315,32 @@ static void uv_rtc_timer_setup(enum clock_event_mode mode,
break;
case CLOCK_EVT_MODE_UNUSED:
case CLOCK_EVT_MODE_SHUTDOWN:
- uv_rtc_unset_timer(ced_cpu);
+ uv_rtc_unset_timer(ced_cpu, 1);
break;
}
}
static void uv_rtc_interrupt(void)
{
- struct clock_event_device *ced = &__get_cpu_var(cpu_ced);
int cpu = smp_processor_id();
+ struct clock_event_device *ced = &per_cpu(cpu_ced, cpu);
if (!ced || !ced->event_handler)
return;
- if (uv_rtc_unset_timer(cpu) != 1)
+ if (uv_rtc_unset_timer(cpu, 0) != 1)
return;
ced->event_handler(ced);
}
-static int __init uv_enable_rtc(char *str)
+static int __init uv_enable_evt_rtc(char *str)
{
- uv_rtc_enable = 1;
+ uv_rtc_evt_enable = 1;
return 1;
}
-__setup("uvrtc", uv_enable_rtc);
+__setup("uvrtcevt", uv_enable_evt_rtc);
static __init void uv_rtc_register_clockevents(struct work_struct *dummy)
{
@@ -350,27 +355,32 @@ static __init int uv_rtc_setup_clock(void)
{
int rc;
- if (!uv_rtc_enable || !is_uv_system() || generic_interrupt_extension)
+ if (!is_uv_system())
return -ENODEV;
- generic_interrupt_extension = uv_rtc_interrupt;
-
clocksource_uv.mult = clocksource_hz2mult(sn_rtc_cycles_per_second,
clocksource_uv.shift);
+ /* If single blade, prefer tsc */
+ if (uv_num_possible_blades() == 1)
+ clocksource_uv.rating = 250;
+
rc = clocksource_register(&clocksource_uv);
- if (rc) {
- generic_interrupt_extension = NULL;
+ if (rc)
+ printk(KERN_INFO "UV RTC clocksource failed rc %d\n", rc);
+ else
+ printk(KERN_INFO "UV RTC clocksource registered freq %lu MHz\n",
+ sn_rtc_cycles_per_second/(unsigned long)1E6);
+
+ if (rc || !uv_rtc_evt_enable || x86_platform_ipi_callback)
return rc;
- }
/* Setup and register clockevents */
rc = uv_rtc_allocate_timers();
- if (rc) {
- clocksource_unregister(&clocksource_uv);
- generic_interrupt_extension = NULL;
- return rc;
- }
+ if (rc)
+ goto error;
+
+ x86_platform_ipi_callback = uv_rtc_interrupt;
clock_event_device_uv.mult = div_sc(sn_rtc_cycles_per_second,
NSEC_PER_SEC, clock_event_device_uv.shift);
@@ -383,11 +393,19 @@ static __init int uv_rtc_setup_clock(void)
rc = schedule_on_each_cpu(uv_rtc_register_clockevents);
if (rc) {
- clocksource_unregister(&clocksource_uv);
- generic_interrupt_extension = NULL;
+ x86_platform_ipi_callback = NULL;
uv_rtc_deallocate_timers();
+ goto error;
}
+ printk(KERN_INFO "UV RTC clockevents registered\n");
+
+ return 0;
+
+error:
+ clocksource_unregister(&clocksource_uv);
+ printk(KERN_INFO "UV RTC clockevents failed rc %d\n", rc);
+
return rc;
}
arch_initcall(uv_rtc_setup_clock);
diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c
index abda6f53e71e..34a279a7471d 100644
--- a/arch/x86/kernel/visws_quirks.c
+++ b/arch/x86/kernel/visws_quirks.c
@@ -197,7 +197,7 @@ static void __init MP_processor_info(struct mpc_cpu *m)
apic_version[m->apicid] = ver;
}
-static void __init visws_find_smp_config(unsigned int reserve)
+static void __init visws_find_smp_config(void)
{
struct mpc_cpu *mp = phys_to_virt(CO_CPU_TAB_PHYS);
unsigned short ncpus = readw(phys_to_virt(CO_CPU_NUM_PHYS));
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index 9c4e62539058..5ffb5622f793 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -197,9 +197,8 @@ out:
static int do_vm86_irq_handling(int subfunction, int irqnumber);
static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk);
-int sys_vm86old(struct pt_regs *regs)
+int sys_vm86old(struct vm86_struct __user *v86, struct pt_regs *regs)
{
- struct vm86_struct __user *v86 = (struct vm86_struct __user *)regs->bx;
struct kernel_vm86_struct info; /* declare this _on top_,
* this avoids wasting of stack space.
* This remains on the stack until we
@@ -227,7 +226,7 @@ out:
}
-int sys_vm86(struct pt_regs *regs)
+int sys_vm86(unsigned long cmd, unsigned long arg, struct pt_regs *regs)
{
struct kernel_vm86_struct info; /* declare this _on top_,
* this avoids wasting of stack space.
@@ -239,12 +238,12 @@ int sys_vm86(struct pt_regs *regs)
struct vm86plus_struct __user *v86;
tsk = current;
- switch (regs->bx) {
+ switch (cmd) {
case VM86_REQUEST_IRQ:
case VM86_FREE_IRQ:
case VM86_GET_IRQ_BITS:
case VM86_GET_AND_RESET_IRQ:
- ret = do_vm86_irq_handling(regs->bx, (int)regs->cx);
+ ret = do_vm86_irq_handling(cmd, (int)arg);
goto out;
case VM86_PLUS_INSTALL_CHECK:
/*
@@ -261,7 +260,7 @@ int sys_vm86(struct pt_regs *regs)
ret = -EPERM;
if (tsk->thread.saved_sp0)
goto out;
- v86 = (struct vm86plus_struct __user *)regs->cx;
+ v86 = (struct vm86plus_struct __user *)arg;
tmp = copy_vm86_regs_from_user(&info.regs, &v86->regs,
offsetof(struct kernel_vm86_struct, regs32) -
sizeof(info.regs));
diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c
index 611b9e2360d3..74c92bb194df 100644
--- a/arch/x86/kernel/vmiclock_32.c
+++ b/arch/x86/kernel/vmiclock_32.c
@@ -226,7 +226,7 @@ static void __devinit vmi_time_init_clockevent(void)
evt->min_delta_ns = clockevent_delta2ns(1, evt);
evt->cpumask = cpumask_of(cpu);
- printk(KERN_WARNING "vmi: registering clock event %s. mult=%lu shift=%u\n",
+ printk(KERN_WARNING "vmi: registering clock event %s. mult=%u shift=%u\n",
evt->name, evt->mult, evt->shift);
clockevents_register_device(evt);
}
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 3c68fe2d46cf..f92a0da608cb 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -41,6 +41,32 @@ ENTRY(phys_startup_64)
jiffies_64 = jiffies;
#endif
+#if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA)
+/*
+ * On 64-bit, align RODATA to 2MB so that even with CONFIG_DEBUG_RODATA
+ * we retain large page mappings for boundaries spanning kernel text, rodata
+ * and data sections.
+ *
+ * However, kernel identity mappings will have different RWX permissions
+ * to the pages mapping to text and to the pages padding (which are freed) the
+ * text section. Hence kernel identity mappings will be broken to smaller
+ * pages. For 64-bit, kernel text and kernel identity mappings are different,
+ * so we can enable protection checks that come with CONFIG_DEBUG_RODATA,
+ * as well as retain 2MB large page mappings for kernel text.
+ */
+#define X64_ALIGN_DEBUG_RODATA_BEGIN . = ALIGN(HPAGE_SIZE);
+
+#define X64_ALIGN_DEBUG_RODATA_END \
+ . = ALIGN(HPAGE_SIZE); \
+ __end_rodata_hpage_align = .;
+
+#else
+
+#define X64_ALIGN_DEBUG_RODATA_BEGIN
+#define X64_ALIGN_DEBUG_RODATA_END
+
+#endif
+
PHDRS {
text PT_LOAD FLAGS(5); /* R_E */
data PT_LOAD FLAGS(7); /* RWE */
@@ -90,7 +116,9 @@ SECTIONS
EXCEPTION_TABLE(16) :text = 0x9090
+ X64_ALIGN_DEBUG_RODATA_BEGIN
RO_DATA(PAGE_SIZE)
+ X64_ALIGN_DEBUG_RODATA_END
/* Data */
.data : AT(ADDR(.data) - LOAD_OFFSET) {
@@ -107,13 +135,13 @@ SECTIONS
PAGE_ALIGNED_DATA(PAGE_SIZE)
- CACHELINE_ALIGNED_DATA(CONFIG_X86_L1_CACHE_BYTES)
+ CACHELINE_ALIGNED_DATA(L1_CACHE_BYTES)
DATA_DATA
CONSTRUCTORS
/* rarely changed data like cpu maps */
- READ_MOSTLY_DATA(CONFIG_X86_INTERNODE_CACHE_BYTES)
+ READ_MOSTLY_DATA(INTERNODE_CACHE_BYTES)
/* End of data section */
_edata = .;
@@ -137,12 +165,12 @@ SECTIONS
*(.vsyscall_0)
} :user
- . = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
+ . = ALIGN(L1_CACHE_BYTES);
.vsyscall_fn : AT(VLOAD(.vsyscall_fn)) {
*(.vsyscall_fn)
}
- . = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
+ . = ALIGN(L1_CACHE_BYTES);
.vsyscall_gtod_data : AT(VLOAD(.vsyscall_gtod_data)) {
*(.vsyscall_gtod_data)
}
@@ -166,7 +194,7 @@ SECTIONS
}
vgetcpu_mode = VVIRT(.vgetcpu_mode);
- . = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
+ . = ALIGN(L1_CACHE_BYTES);
.jiffies : AT(VLOAD(.jiffies)) {
*(.jiffies)
}
@@ -291,9 +319,7 @@ SECTIONS
__brk_limit = .;
}
- .end : AT(ADDR(.end) - LOAD_OFFSET) {
- _end = .;
- }
+ _end = .;
STABS_DEBUG
DWARF_DEBUG
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 8cb4974ff599..9055e5872ff0 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -73,7 +73,8 @@ void update_vsyscall_tz(void)
write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags);
}
-void update_vsyscall(struct timespec *wall_time, struct clocksource *clock)
+void update_vsyscall(struct timespec *wall_time, struct clocksource *clock,
+ u32 mult)
{
unsigned long flags;
@@ -82,7 +83,7 @@ void update_vsyscall(struct timespec *wall_time, struct clocksource *clock)
vsyscall_gtod_data.clock.vread = clock->vread;
vsyscall_gtod_data.clock.cycle_last = clock->cycle_last;
vsyscall_gtod_data.clock.mask = clock->mask;
- vsyscall_gtod_data.clock.mult = clock->mult;
+ vsyscall_gtod_data.clock.mult = mult;
vsyscall_gtod_data.clock.shift = clock->shift;
vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec;
vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec;
@@ -237,7 +238,7 @@ static ctl_table kernel_table2[] = {
};
static ctl_table kernel_root_table2[] = {
- { .ctl_name = CTL_KERN, .procname = "kernel", .mode = 0555,
+ { .procname = "kernel", .mode = 0555,
.child = kernel_table2 },
{}
};
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
index a1029769b6f2..619f7f88b8cc 100644
--- a/arch/x86/kernel/x8664_ksyms_64.c
+++ b/arch/x86/kernel/x8664_ksyms_64.c
@@ -17,8 +17,6 @@
EXPORT_SYMBOL(mcount);
#endif
-EXPORT_SYMBOL(kernel_thread);
-
EXPORT_SYMBOL(__get_user_1);
EXPORT_SYMBOL(__get_user_2);
EXPORT_SYMBOL(__get_user_4);
@@ -56,4 +54,6 @@ EXPORT_SYMBOL(__memcpy);
EXPORT_SYMBOL(empty_zero_page);
EXPORT_SYMBOL(init_level4_pgt);
-EXPORT_SYMBOL(load_gs_index);
+#ifndef CONFIG_PARAVIRT
+EXPORT_SYMBOL(native_load_gs_index);
+#endif
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index d11c5ff7c65e..ccd179dec36e 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -13,6 +13,7 @@
#include <asm/e820.h>
#include <asm/time.h>
#include <asm/irq.h>
+#include <asm/pat.h>
#include <asm/tsc.h>
#include <asm/iommu.h>
@@ -80,4 +81,5 @@ struct x86_platform_ops x86_platform = {
.get_wallclock = mach_get_cmos_time,
.set_wallclock = mach_set_rtc_mmss,
.iommu_shutdown = iommu_shutdown_noop,
+ .is_untracked_pat_range = is_ISA_range,
};
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index b84e571f4175..4cd498332466 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -28,6 +28,7 @@ config KVM
select HAVE_KVM_IRQCHIP
select HAVE_KVM_EVENTFD
select KVM_APIC_ARCHITECTURE
+ select USER_RETURN_NOTIFIER
---help---
Support hosting fully virtualized guest machines using hardware
virtualization extensions. You will need a fairly recent
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index 0e7fe78d0f74..31a7035c4bd9 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -6,7 +6,8 @@ CFLAGS_svm.o := -I.
CFLAGS_vmx.o := -I.
kvm-y += $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
- coalesced_mmio.o irq_comm.o eventfd.o)
+ coalesced_mmio.o irq_comm.o eventfd.o \
+ assigned-dev.o)
kvm-$(CONFIG_IOMMU_API) += $(addprefix ../../../virt/kvm/, iommu.o)
kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 1be5cd640e93..7e8faea4651e 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -75,6 +75,8 @@
#define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */
#define GroupDual (1<<15) /* Alternate decoding of mod == 3 */
#define GroupMask 0xff /* Group number stored in bits 0:7 */
+/* Misc flags */
+#define No64 (1<<28)
/* Source 2 operand type */
#define Src2None (0<<29)
#define Src2CL (1<<29)
@@ -92,19 +94,23 @@ static u32 opcode_table[256] = {
/* 0x00 - 0x07 */
ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
- ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, 0, 0,
+ ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
+ ImplicitOps | Stack | No64, ImplicitOps | Stack | No64,
/* 0x08 - 0x0F */
ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
- 0, 0, 0, 0,
+ ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
+ ImplicitOps | Stack | No64, 0,
/* 0x10 - 0x17 */
ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
- ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, 0, 0,
+ ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
+ ImplicitOps | Stack | No64, ImplicitOps | Stack | No64,
/* 0x18 - 0x1F */
ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
- ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, 0, 0,
+ ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
+ ImplicitOps | Stack | No64, ImplicitOps | Stack | No64,
/* 0x20 - 0x27 */
ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
@@ -133,7 +139,8 @@ static u32 opcode_table[256] = {
DstReg | Stack, DstReg | Stack, DstReg | Stack, DstReg | Stack,
DstReg | Stack, DstReg | Stack, DstReg | Stack, DstReg | Stack,
/* 0x60 - 0x67 */
- 0, 0, 0, DstReg | SrcMem32 | ModRM | Mov /* movsxd (x86/64) */ ,
+ ImplicitOps | Stack | No64, ImplicitOps | Stack | No64,
+ 0, DstReg | SrcMem32 | ModRM | Mov /* movsxd (x86/64) */ ,
0, 0, 0, 0,
/* 0x68 - 0x6F */
SrcImm | Mov | Stack, 0, SrcImmByte | Mov | Stack, 0,
@@ -158,7 +165,7 @@ static u32 opcode_table[256] = {
/* 0x90 - 0x97 */
DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg,
/* 0x98 - 0x9F */
- 0, 0, SrcImm | Src2Imm16, 0,
+ 0, 0, SrcImm | Src2Imm16 | No64, 0,
ImplicitOps | Stack, ImplicitOps | Stack, 0, 0,
/* 0xA0 - 0xA7 */
ByteOp | DstReg | SrcMem | Mov | MemAbs, DstReg | SrcMem | Mov | MemAbs,
@@ -185,7 +192,7 @@ static u32 opcode_table[256] = {
ByteOp | DstMem | SrcImm | ModRM | Mov, DstMem | SrcImm | ModRM | Mov,
/* 0xC8 - 0xCF */
0, 0, 0, ImplicitOps | Stack,
- ImplicitOps, SrcImmByte, ImplicitOps, ImplicitOps,
+ ImplicitOps, SrcImmByte, ImplicitOps | No64, ImplicitOps,
/* 0xD0 - 0xD7 */
ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM,
ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM,
@@ -198,7 +205,7 @@ static u32 opcode_table[256] = {
ByteOp | SrcImmUByte, SrcImmUByte,
/* 0xE8 - 0xEF */
SrcImm | Stack, SrcImm | ImplicitOps,
- SrcImmU | Src2Imm16, SrcImmByte | ImplicitOps,
+ SrcImmU | Src2Imm16 | No64, SrcImmByte | ImplicitOps,
SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps,
SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps,
/* 0xF0 - 0xF7 */
@@ -244,11 +251,13 @@ static u32 twobyte_table[256] = {
/* 0x90 - 0x9F */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* 0xA0 - 0xA7 */
- 0, 0, 0, DstMem | SrcReg | ModRM | BitOp,
+ ImplicitOps | Stack, ImplicitOps | Stack,
+ 0, DstMem | SrcReg | ModRM | BitOp,
DstMem | SrcReg | Src2ImmByte | ModRM,
DstMem | SrcReg | Src2CL | ModRM, 0, 0,
/* 0xA8 - 0xAF */
- 0, 0, 0, DstMem | SrcReg | ModRM | BitOp,
+ ImplicitOps | Stack, ImplicitOps | Stack,
+ 0, DstMem | SrcReg | ModRM | BitOp,
DstMem | SrcReg | Src2ImmByte | ModRM,
DstMem | SrcReg | Src2CL | ModRM,
ModRM, 0,
@@ -613,6 +622,9 @@ static int do_insn_fetch(struct x86_emulate_ctxt *ctxt,
{
int rc = 0;
+ /* x86 instructions are limited to 15 bytes. */
+ if (eip + size - ctxt->decode.eip_orig > 15)
+ return X86EMUL_UNHANDLEABLE;
eip += ctxt->cs_base;
while (size--) {
rc = do_fetch_insn_byte(ctxt, ops, eip++, dest++);
@@ -871,7 +883,7 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
/* Shadow copy of register state. Committed on successful emulation. */
memset(c, 0, sizeof(struct decode_cache));
- c->eip = kvm_rip_read(ctxt->vcpu);
+ c->eip = c->eip_orig = kvm_rip_read(ctxt->vcpu);
ctxt->cs_base = seg_base(ctxt, VCPU_SREG_CS);
memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs);
@@ -962,6 +974,11 @@ done_prefixes:
}
}
+ if (mode == X86EMUL_MODE_PROT64 && (c->d & No64)) {
+ kvm_report_emulation_failure(ctxt->vcpu, "invalid x86/64 instruction");;
+ return -1;
+ }
+
if (c->d & Group) {
group = c->d & GroupMask;
c->modrm = insn_fetch(u8, 1, c->eip);
@@ -1186,6 +1203,69 @@ static int emulate_pop(struct x86_emulate_ctxt *ctxt,
return rc;
}
+static void emulate_push_sreg(struct x86_emulate_ctxt *ctxt, int seg)
+{
+ struct decode_cache *c = &ctxt->decode;
+ struct kvm_segment segment;
+
+ kvm_x86_ops->get_segment(ctxt->vcpu, &segment, seg);
+
+ c->src.val = segment.selector;
+ emulate_push(ctxt);
+}
+
+static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt,
+ struct x86_emulate_ops *ops, int seg)
+{
+ struct decode_cache *c = &ctxt->decode;
+ unsigned long selector;
+ int rc;
+
+ rc = emulate_pop(ctxt, ops, &selector, c->op_bytes);
+ if (rc != 0)
+ return rc;
+
+ rc = kvm_load_segment_descriptor(ctxt->vcpu, (u16)selector, 1, seg);
+ return rc;
+}
+
+static void emulate_pusha(struct x86_emulate_ctxt *ctxt)
+{
+ struct decode_cache *c = &ctxt->decode;
+ unsigned long old_esp = c->regs[VCPU_REGS_RSP];
+ int reg = VCPU_REGS_RAX;
+
+ while (reg <= VCPU_REGS_RDI) {
+ (reg == VCPU_REGS_RSP) ?
+ (c->src.val = old_esp) : (c->src.val = c->regs[reg]);
+
+ emulate_push(ctxt);
+ ++reg;
+ }
+}
+
+static int emulate_popa(struct x86_emulate_ctxt *ctxt,
+ struct x86_emulate_ops *ops)
+{
+ struct decode_cache *c = &ctxt->decode;
+ int rc = 0;
+ int reg = VCPU_REGS_RDI;
+
+ while (reg >= VCPU_REGS_RAX) {
+ if (reg == VCPU_REGS_RSP) {
+ register_address_increment(c, &c->regs[VCPU_REGS_RSP],
+ c->op_bytes);
+ --reg;
+ }
+
+ rc = emulate_pop(ctxt, ops, &c->regs[reg], c->op_bytes);
+ if (rc != 0)
+ break;
+ --reg;
+ }
+ return rc;
+}
+
static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt,
struct x86_emulate_ops *ops)
{
@@ -1707,18 +1787,45 @@ special_insn:
add: /* add */
emulate_2op_SrcV("add", c->src, c->dst, ctxt->eflags);
break;
+ case 0x06: /* push es */
+ emulate_push_sreg(ctxt, VCPU_SREG_ES);
+ break;
+ case 0x07: /* pop es */
+ rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_ES);
+ if (rc != 0)
+ goto done;
+ break;
case 0x08 ... 0x0d:
or: /* or */
emulate_2op_SrcV("or", c->src, c->dst, ctxt->eflags);
break;
+ case 0x0e: /* push cs */
+ emulate_push_sreg(ctxt, VCPU_SREG_CS);
+ break;
case 0x10 ... 0x15:
adc: /* adc */
emulate_2op_SrcV("adc", c->src, c->dst, ctxt->eflags);
break;
+ case 0x16: /* push ss */
+ emulate_push_sreg(ctxt, VCPU_SREG_SS);
+ break;
+ case 0x17: /* pop ss */
+ rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_SS);
+ if (rc != 0)
+ goto done;
+ break;
case 0x18 ... 0x1d:
sbb: /* sbb */
emulate_2op_SrcV("sbb", c->src, c->dst, ctxt->eflags);
break;
+ case 0x1e: /* push ds */
+ emulate_push_sreg(ctxt, VCPU_SREG_DS);
+ break;
+ case 0x1f: /* pop ds */
+ rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_DS);
+ if (rc != 0)
+ goto done;
+ break;
case 0x20 ... 0x25:
and: /* and */
emulate_2op_SrcV("and", c->src, c->dst, ctxt->eflags);
@@ -1750,6 +1857,14 @@ special_insn:
if (rc != 0)
goto done;
break;
+ case 0x60: /* pusha */
+ emulate_pusha(ctxt);
+ break;
+ case 0x61: /* popa */
+ rc = emulate_popa(ctxt, ops);
+ if (rc != 0)
+ goto done;
+ break;
case 0x63: /* movsxd */
if (ctxt->mode != X86EMUL_MODE_PROT64)
goto cannot_emulate;
@@ -1761,7 +1876,7 @@ special_insn:
break;
case 0x6c: /* insb */
case 0x6d: /* insw/insd */
- if (kvm_emulate_pio_string(ctxt->vcpu, NULL,
+ if (kvm_emulate_pio_string(ctxt->vcpu,
1,
(c->d & ByteOp) ? 1 : c->op_bytes,
c->rep_prefix ?
@@ -1777,7 +1892,7 @@ special_insn:
return 0;
case 0x6e: /* outsb */
case 0x6f: /* outsw/outsd */
- if (kvm_emulate_pio_string(ctxt->vcpu, NULL,
+ if (kvm_emulate_pio_string(ctxt->vcpu,
0,
(c->d & ByteOp) ? 1 : c->op_bytes,
c->rep_prefix ?
@@ -2070,7 +2185,7 @@ special_insn:
case 0xef: /* out (e/r)ax,dx */
port = c->regs[VCPU_REGS_RDX];
io_dir_in = 0;
- do_io: if (kvm_emulate_pio(ctxt->vcpu, NULL, io_dir_in,
+ do_io: if (kvm_emulate_pio(ctxt->vcpu, io_dir_in,
(c->d & ByteOp) ? 1 : c->op_bytes,
port) != 0) {
c->eip = saved_eip;
@@ -2297,6 +2412,14 @@ twobyte_insn:
jmp_rel(c, c->src.val);
c->dst.type = OP_NONE;
break;
+ case 0xa0: /* push fs */
+ emulate_push_sreg(ctxt, VCPU_SREG_FS);
+ break;
+ case 0xa1: /* pop fs */
+ rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_FS);
+ if (rc != 0)
+ goto done;
+ break;
case 0xa3:
bt: /* bt */
c->dst.type = OP_NONE;
@@ -2308,6 +2431,14 @@ twobyte_insn:
case 0xa5: /* shld cl, r, r/m */
emulate_2op_cl("shld", c->src2, c->src, c->dst, ctxt->eflags);
break;
+ case 0xa8: /* push gs */
+ emulate_push_sreg(ctxt, VCPU_SREG_GS);
+ break;
+ case 0xa9: /* pop gs */
+ rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_GS);
+ if (rc != 0)
+ goto done;
+ break;
case 0xab:
bts: /* bts */
/* only subword offset */
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 144e7f60b5e2..296aba49472a 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -29,6 +29,8 @@
* Based on QEMU and Xen.
*/
+#define pr_fmt(fmt) "pit: " fmt
+
#include <linux/kvm_host.h>
#include "irq.h"
@@ -262,7 +264,7 @@ void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu)
static void destroy_pit_timer(struct kvm_timer *pt)
{
- pr_debug("pit: execute del timer!\n");
+ pr_debug("execute del timer!\n");
hrtimer_cancel(&pt->timer);
}
@@ -284,7 +286,7 @@ static void create_pit_timer(struct kvm_kpit_state *ps, u32 val, int is_period)
interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ);
- pr_debug("pit: create pit timer, interval is %llu nsec\n", interval);
+ pr_debug("create pit timer, interval is %llu nsec\n", interval);
/* TODO The new value only affected after the retriggered */
hrtimer_cancel(&pt->timer);
@@ -309,7 +311,7 @@ static void pit_load_count(struct kvm *kvm, int channel, u32 val)
WARN_ON(!mutex_is_locked(&ps->lock));
- pr_debug("pit: load_count val is %d, channel is %d\n", val, channel);
+ pr_debug("load_count val is %d, channel is %d\n", val, channel);
/*
* The largest possible initial count is 0; this is equivalent
@@ -395,8 +397,8 @@ static int pit_ioport_write(struct kvm_io_device *this,
mutex_lock(&pit_state->lock);
if (val != 0)
- pr_debug("pit: write addr is 0x%x, len is %d, val is 0x%x\n",
- (unsigned int)addr, len, val);
+ pr_debug("write addr is 0x%x, len is %d, val is 0x%x\n",
+ (unsigned int)addr, len, val);
if (addr == 3) {
channel = val >> 6;
@@ -688,10 +690,8 @@ static void __inject_pit_timer_intr(struct kvm *kvm)
struct kvm_vcpu *vcpu;
int i;
- mutex_lock(&kvm->irq_lock);
kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1);
kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0);
- mutex_unlock(&kvm->irq_lock);
/*
* Provides NMI watchdog support via Virtual Wire mode.
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index 01f151682802..d057c0cbd245 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -38,7 +38,15 @@ static void pic_clear_isr(struct kvm_kpic_state *s, int irq)
s->isr_ack |= (1 << irq);
if (s != &s->pics_state->pics[0])
irq += 8;
+ /*
+ * We are dropping lock while calling ack notifiers since ack
+ * notifier callbacks for assigned devices call into PIC recursively.
+ * Other interrupt may be delivered to PIC while lock is dropped but
+ * it should be safe since PIC state is already updated at this stage.
+ */
+ spin_unlock(&s->pics_state->lock);
kvm_notify_acked_irq(s->pics_state->kvm, SELECT_PIC(irq), irq);
+ spin_lock(&s->pics_state->lock);
}
void kvm_pic_clear_isr_ack(struct kvm *kvm)
@@ -176,16 +184,18 @@ int kvm_pic_set_irq(void *opaque, int irq, int level)
static inline void pic_intack(struct kvm_kpic_state *s, int irq)
{
s->isr |= 1 << irq;
- if (s->auto_eoi) {
- if (s->rotate_on_auto_eoi)
- s->priority_add = (irq + 1) & 7;
- pic_clear_isr(s, irq);
- }
/*
* We don't clear a level sensitive interrupt here
*/
if (!(s->elcr & (1 << irq)))
s->irr &= ~(1 << irq);
+
+ if (s->auto_eoi) {
+ if (s->rotate_on_auto_eoi)
+ s->priority_add = (irq + 1) & 7;
+ pic_clear_isr(s, irq);
+ }
+
}
int kvm_pic_read_irq(struct kvm *kvm)
@@ -225,22 +235,11 @@ int kvm_pic_read_irq(struct kvm *kvm)
void kvm_pic_reset(struct kvm_kpic_state *s)
{
- int irq, irqbase, n;
+ int irq;
struct kvm *kvm = s->pics_state->irq_request_opaque;
struct kvm_vcpu *vcpu0 = kvm->bsp_vcpu;
+ u8 irr = s->irr, isr = s->imr;
- if (s == &s->pics_state->pics[0])
- irqbase = 0;
- else
- irqbase = 8;
-
- for (irq = 0; irq < PIC_NUM_PINS/2; irq++) {
- if (vcpu0 && kvm_apic_accept_pic_intr(vcpu0))
- if (s->irr & (1 << irq) || s->isr & (1 << irq)) {
- n = irq + irqbase;
- kvm_notify_acked_irq(kvm, SELECT_PIC(n), n);
- }
- }
s->last_irr = 0;
s->irr = 0;
s->imr = 0;
@@ -256,6 +255,13 @@ void kvm_pic_reset(struct kvm_kpic_state *s)
s->rotate_on_auto_eoi = 0;
s->special_fully_nested_mode = 0;
s->init4 = 0;
+
+ for (irq = 0; irq < PIC_NUM_PINS/2; irq++) {
+ if (vcpu0 && kvm_apic_accept_pic_intr(vcpu0))
+ if (irr & (1 << irq) || isr & (1 << irq)) {
+ pic_clear_isr(s, irq);
+ }
+ }
}
static void pic_ioport_write(void *opaque, u32 addr, u32 val)
@@ -298,9 +304,9 @@ static void pic_ioport_write(void *opaque, u32 addr, u32 val)
priority = get_priority(s, s->isr);
if (priority != 8) {
irq = (priority + s->priority_add) & 7;
- pic_clear_isr(s, irq);
if (cmd == 5)
s->priority_add = (irq + 1) & 7;
+ pic_clear_isr(s, irq);
pic_update_irq(s->pics_state);
}
break;
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index 7d6058a2fd38..be399e207d57 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -71,6 +71,7 @@ struct kvm_pic {
int output; /* intr from master PIC */
struct kvm_io_device dev;
void (*ack_notifier)(void *opaque, int irq);
+ unsigned long irq_states[16];
};
struct kvm_pic *kvm_create_pic(struct kvm *kvm);
@@ -85,7 +86,11 @@ static inline struct kvm_pic *pic_irqchip(struct kvm *kvm)
static inline int irqchip_in_kernel(struct kvm *kvm)
{
- return pic_irqchip(kvm) != NULL;
+ int ret;
+
+ ret = (pic_irqchip(kvm) != NULL);
+ smp_rmb();
+ return ret;
}
void kvm_pic_reset(struct kvm_kpic_state *s);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 23c217692ea9..cd60c0bd1b32 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -32,7 +32,6 @@
#include <asm/current.h>
#include <asm/apicdef.h>
#include <asm/atomic.h>
-#include <asm/apicdef.h>
#include "kvm_cache_regs.h"
#include "irq.h"
#include "trace.h"
@@ -471,11 +470,8 @@ static void apic_set_eoi(struct kvm_lapic *apic)
trigger_mode = IOAPIC_LEVEL_TRIG;
else
trigger_mode = IOAPIC_EDGE_TRIG;
- if (!(apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI)) {
- mutex_lock(&apic->vcpu->kvm->irq_lock);
+ if (!(apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI))
kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode);
- mutex_unlock(&apic->vcpu->kvm->irq_lock);
- }
}
static void apic_send_ipi(struct kvm_lapic *apic)
@@ -504,9 +500,7 @@ static void apic_send_ipi(struct kvm_lapic *apic)
irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode,
irq.vector);
- mutex_lock(&apic->vcpu->kvm->irq_lock);
kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq);
- mutex_unlock(&apic->vcpu->kvm->irq_lock);
}
static u32 apic_get_tmcct(struct kvm_lapic *apic)
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 818b92ad82cf..4c3e5b2314cb 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2789,7 +2789,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code)
if (r)
goto out;
- er = emulate_instruction(vcpu, vcpu->run, cr2, error_code, 0);
+ er = emulate_instruction(vcpu, cr2, error_code, 0);
switch (er) {
case EMULATE_DONE:
@@ -2800,6 +2800,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code)
case EMULATE_FAIL:
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
+ vcpu->run->internal.ndata = 0;
return 0;
default:
BUG();
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 72558f8ff3f5..a6017132fba8 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -467,7 +467,6 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
level = iterator.level;
sptep = iterator.sptep;
- /* FIXME: properly handle invlpg on large guest pages */
if (level == PT_PAGE_TABLE_LEVEL ||
((level == PT_DIRECTORY_LEVEL && is_large_pte(*sptep))) ||
((level == PT_PDPE_LEVEL && is_large_pte(*sptep)))) {
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index c17404add91f..1d9b33843c80 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -46,6 +46,7 @@ MODULE_LICENSE("GPL");
#define SVM_FEATURE_NPT (1 << 0)
#define SVM_FEATURE_LBRV (1 << 1)
#define SVM_FEATURE_SVML (1 << 2)
+#define SVM_FEATURE_PAUSE_FILTER (1 << 10)
#define NESTED_EXIT_HOST 0 /* Exit handled on host level */
#define NESTED_EXIT_DONE 1 /* Exit caused nested vmexit */
@@ -53,15 +54,6 @@ MODULE_LICENSE("GPL");
#define DEBUGCTL_RESERVED_BITS (~(0x3fULL))
-/* Turn on to get debugging output*/
-/* #define NESTED_DEBUG */
-
-#ifdef NESTED_DEBUG
-#define nsvm_printk(fmt, args...) printk(KERN_INFO fmt, ## args)
-#else
-#define nsvm_printk(fmt, args...) do {} while(0)
-#endif
-
static const u32 host_save_user_msrs[] = {
#ifdef CONFIG_X86_64
MSR_STAR, MSR_LSTAR, MSR_CSTAR, MSR_SYSCALL_MASK, MSR_KERNEL_GS_BASE,
@@ -85,6 +77,9 @@ struct nested_state {
/* gpa pointers to the real vectors */
u64 vmcb_msrpm;
+ /* A VMEXIT is required but not yet emulated */
+ bool exit_required;
+
/* cache for intercepts of the guest */
u16 intercept_cr_read;
u16 intercept_cr_write;
@@ -112,6 +107,8 @@ struct vcpu_svm {
u32 *msrpm;
struct nested_state nested;
+
+ bool nmi_singlestep;
};
/* enable NPT for AMD64 and X86 with PAE */
@@ -286,7 +283,7 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
struct vcpu_svm *svm = to_svm(vcpu);
if (!svm->next_rip) {
- if (emulate_instruction(vcpu, vcpu->run, 0, 0, EMULTYPE_SKIP) !=
+ if (emulate_instruction(vcpu, 0, 0, EMULTYPE_SKIP) !=
EMULATE_DONE)
printk(KERN_DEBUG "%s: NOP\n", __func__);
return;
@@ -316,75 +313,79 @@ static void svm_hardware_disable(void *garbage)
cpu_svm_disable();
}
-static void svm_hardware_enable(void *garbage)
+static int svm_hardware_enable(void *garbage)
{
- struct svm_cpu_data *svm_data;
+ struct svm_cpu_data *sd;
uint64_t efer;
struct descriptor_table gdt_descr;
struct desc_struct *gdt;
int me = raw_smp_processor_id();
+ rdmsrl(MSR_EFER, efer);
+ if (efer & EFER_SVME)
+ return -EBUSY;
+
if (!has_svm()) {
- printk(KERN_ERR "svm_cpu_init: err EOPNOTSUPP on %d\n", me);
- return;
+ printk(KERN_ERR "svm_hardware_enable: err EOPNOTSUPP on %d\n",
+ me);
+ return -EINVAL;
}
- svm_data = per_cpu(svm_data, me);
+ sd = per_cpu(svm_data, me);
- if (!svm_data) {
- printk(KERN_ERR "svm_cpu_init: svm_data is NULL on %d\n",
+ if (!sd) {
+ printk(KERN_ERR "svm_hardware_enable: svm_data is NULL on %d\n",
me);
- return;
+ return -EINVAL;
}
- svm_data->asid_generation = 1;
- svm_data->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1;
- svm_data->next_asid = svm_data->max_asid + 1;
+ sd->asid_generation = 1;
+ sd->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1;
+ sd->next_asid = sd->max_asid + 1;
kvm_get_gdt(&gdt_descr);
gdt = (struct desc_struct *)gdt_descr.base;
- svm_data->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS);
+ sd->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS);
- rdmsrl(MSR_EFER, efer);
wrmsrl(MSR_EFER, efer | EFER_SVME);
- wrmsrl(MSR_VM_HSAVE_PA,
- page_to_pfn(svm_data->save_area) << PAGE_SHIFT);
+ wrmsrl(MSR_VM_HSAVE_PA, page_to_pfn(sd->save_area) << PAGE_SHIFT);
+
+ return 0;
}
static void svm_cpu_uninit(int cpu)
{
- struct svm_cpu_data *svm_data
- = per_cpu(svm_data, raw_smp_processor_id());
+ struct svm_cpu_data *sd = per_cpu(svm_data, raw_smp_processor_id());
- if (!svm_data)
+ if (!sd)
return;
per_cpu(svm_data, raw_smp_processor_id()) = NULL;
- __free_page(svm_data->save_area);
- kfree(svm_data);
+ __free_page(sd->save_area);
+ kfree(sd);
}
static int svm_cpu_init(int cpu)
{
- struct svm_cpu_data *svm_data;
+ struct svm_cpu_data *sd;
int r;
- svm_data = kzalloc(sizeof(struct svm_cpu_data), GFP_KERNEL);
- if (!svm_data)
+ sd = kzalloc(sizeof(struct svm_cpu_data), GFP_KERNEL);
+ if (!sd)
return -ENOMEM;
- svm_data->cpu = cpu;
- svm_data->save_area = alloc_page(GFP_KERNEL);
+ sd->cpu = cpu;
+ sd->save_area = alloc_page(GFP_KERNEL);
r = -ENOMEM;
- if (!svm_data->save_area)
+ if (!sd->save_area)
goto err_1;
- per_cpu(svm_data, cpu) = svm_data;
+ per_cpu(svm_data, cpu) = sd;
return 0;
err_1:
- kfree(svm_data);
+ kfree(sd);
return r;
}
@@ -476,7 +477,7 @@ static __init int svm_hardware_setup(void)
kvm_enable_efer_bits(EFER_SVME);
}
- for_each_online_cpu(cpu) {
+ for_each_possible_cpu(cpu) {
r = svm_cpu_init(cpu);
if (r)
goto err;
@@ -510,7 +511,7 @@ static __exit void svm_hardware_unsetup(void)
{
int cpu;
- for_each_online_cpu(cpu)
+ for_each_possible_cpu(cpu)
svm_cpu_uninit(cpu);
__free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT), IOPM_ALLOC_ORDER);
@@ -625,11 +626,12 @@ static void init_vmcb(struct vcpu_svm *svm)
save->rip = 0x0000fff0;
svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip;
- /*
- * cr0 val on cpu init should be 0x60000010, we enable cpu
- * cache by default. the orderly way is to enable cache in bios.
+ /* This is the guest-visible cr0 value.
+ * svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0.
*/
- save->cr0 = 0x00000010 | X86_CR0_PG | X86_CR0_WP;
+ svm->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET;
+ kvm_set_cr0(&svm->vcpu, svm->vcpu.arch.cr0);
+
save->cr4 = X86_CR4_PAE;
/* rdx = ?? */
@@ -644,8 +646,6 @@ static void init_vmcb(struct vcpu_svm *svm)
control->intercept_cr_write &= ~(INTERCEPT_CR0_MASK|
INTERCEPT_CR3_MASK);
save->g_pat = 0x0007040600070406ULL;
- /* enable caching because the QEMU Bios doesn't enable it */
- save->cr0 = X86_CR0_ET;
save->cr3 = 0;
save->cr4 = 0;
}
@@ -654,6 +654,11 @@ static void init_vmcb(struct vcpu_svm *svm)
svm->nested.vmcb = 0;
svm->vcpu.arch.hflags = 0;
+ if (svm_has(SVM_FEATURE_PAUSE_FILTER)) {
+ control->pause_filter_count = 3000;
+ control->intercept |= (1ULL << INTERCEPT_PAUSE);
+ }
+
enable_gif(svm);
}
@@ -758,14 +763,13 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
int i;
if (unlikely(cpu != vcpu->cpu)) {
- u64 tsc_this, delta;
+ u64 delta;
/*
* Make sure that the guest sees a monotonically
* increasing TSC.
*/
- rdtscll(tsc_this);
- delta = vcpu->arch.host_tsc - tsc_this;
+ delta = vcpu->arch.host_tsc - native_read_tsc();
svm->vmcb->control.tsc_offset += delta;
if (is_nested(svm))
svm->nested.hsave->control.tsc_offset += delta;
@@ -787,7 +791,7 @@ static void svm_vcpu_put(struct kvm_vcpu *vcpu)
for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
- rdtscll(vcpu->arch.host_tsc);
+ vcpu->arch.host_tsc = native_read_tsc();
}
static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu)
@@ -1045,7 +1049,7 @@ static void update_db_intercept(struct kvm_vcpu *vcpu)
svm->vmcb->control.intercept_exceptions &=
~((1 << DB_VECTOR) | (1 << BP_VECTOR));
- if (vcpu->arch.singlestep)
+ if (svm->nmi_singlestep)
svm->vmcb->control.intercept_exceptions |= (1 << DB_VECTOR);
if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) {
@@ -1060,26 +1064,16 @@ static void update_db_intercept(struct kvm_vcpu *vcpu)
vcpu->guest_debug = 0;
}
-static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
+static void svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
{
- int old_debug = vcpu->guest_debug;
struct vcpu_svm *svm = to_svm(vcpu);
- vcpu->guest_debug = dbg->control;
-
- update_db_intercept(vcpu);
-
if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
svm->vmcb->save.dr7 = dbg->arch.debugreg[7];
else
svm->vmcb->save.dr7 = vcpu->arch.dr7;
- if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
- svm->vmcb->save.rflags |= X86_EFLAGS_TF | X86_EFLAGS_RF;
- else if (old_debug & KVM_GUESTDBG_SINGLESTEP)
- svm->vmcb->save.rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
-
- return 0;
+ update_db_intercept(vcpu);
}
static void load_host_msrs(struct kvm_vcpu *vcpu)
@@ -1096,16 +1090,16 @@ static void save_host_msrs(struct kvm_vcpu *vcpu)
#endif
}
-static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *svm_data)
+static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd)
{
- if (svm_data->next_asid > svm_data->max_asid) {
- ++svm_data->asid_generation;
- svm_data->next_asid = 1;
+ if (sd->next_asid > sd->max_asid) {
+ ++sd->asid_generation;
+ sd->next_asid = 1;
svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID;
}
- svm->asid_generation = svm_data->asid_generation;
- svm->vmcb->control.asid = svm_data->next_asid++;
+ svm->asid_generation = sd->asid_generation;
+ svm->vmcb->control.asid = sd->next_asid++;
}
static unsigned long svm_get_dr(struct kvm_vcpu *vcpu, int dr)
@@ -1180,7 +1174,7 @@ static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value,
}
}
-static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+static int pf_interception(struct vcpu_svm *svm)
{
u64 fault_address;
u32 error_code;
@@ -1194,17 +1188,19 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code);
}
-static int db_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+static int db_interception(struct vcpu_svm *svm)
{
+ struct kvm_run *kvm_run = svm->vcpu.run;
+
if (!(svm->vcpu.guest_debug &
(KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) &&
- !svm->vcpu.arch.singlestep) {
+ !svm->nmi_singlestep) {
kvm_queue_exception(&svm->vcpu, DB_VECTOR);
return 1;
}
- if (svm->vcpu.arch.singlestep) {
- svm->vcpu.arch.singlestep = false;
+ if (svm->nmi_singlestep) {
+ svm->nmi_singlestep = false;
if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP))
svm->vmcb->save.rflags &=
~(X86_EFLAGS_TF | X86_EFLAGS_RF);
@@ -1223,25 +1219,27 @@ static int db_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
return 1;
}
-static int bp_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+static int bp_interception(struct vcpu_svm *svm)
{
+ struct kvm_run *kvm_run = svm->vcpu.run;
+
kvm_run->exit_reason = KVM_EXIT_DEBUG;
kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip;
kvm_run->debug.arch.exception = BP_VECTOR;
return 0;
}
-static int ud_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+static int ud_interception(struct vcpu_svm *svm)
{
int er;
- er = emulate_instruction(&svm->vcpu, kvm_run, 0, 0, EMULTYPE_TRAP_UD);
+ er = emulate_instruction(&svm->vcpu, 0, 0, EMULTYPE_TRAP_UD);
if (er != EMULATE_DONE)
kvm_queue_exception(&svm->vcpu, UD_VECTOR);
return 1;
}
-static int nm_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+static int nm_interception(struct vcpu_svm *svm)
{
svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR);
if (!(svm->vcpu.arch.cr0 & X86_CR0_TS))
@@ -1251,7 +1249,7 @@ static int nm_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
return 1;
}
-static int mc_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+static int mc_interception(struct vcpu_svm *svm)
{
/*
* On an #MC intercept the MCE handler is not called automatically in
@@ -1264,8 +1262,10 @@ static int mc_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
return 1;
}
-static int shutdown_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+static int shutdown_interception(struct vcpu_svm *svm)
{
+ struct kvm_run *kvm_run = svm->vcpu.run;
+
/*
* VMCB is undefined after a SHUTDOWN intercept
* so reinitialize it.
@@ -1277,7 +1277,7 @@ static int shutdown_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
return 0;
}
-static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+static int io_interception(struct vcpu_svm *svm)
{
u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */
int size, in, string;
@@ -1291,7 +1291,7 @@ static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
if (string) {
if (emulate_instruction(&svm->vcpu,
- kvm_run, 0, 0, 0) == EMULATE_DO_MMIO)
+ 0, 0, 0) == EMULATE_DO_MMIO)
return 0;
return 1;
}
@@ -1301,33 +1301,33 @@ static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT;
skip_emulated_instruction(&svm->vcpu);
- return kvm_emulate_pio(&svm->vcpu, kvm_run, in, size, port);
+ return kvm_emulate_pio(&svm->vcpu, in, size, port);
}
-static int nmi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+static int nmi_interception(struct vcpu_svm *svm)
{
return 1;
}
-static int intr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+static int intr_interception(struct vcpu_svm *svm)
{
++svm->vcpu.stat.irq_exits;
return 1;
}
-static int nop_on_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+static int nop_on_interception(struct vcpu_svm *svm)
{
return 1;
}
-static int halt_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+static int halt_interception(struct vcpu_svm *svm)
{
svm->next_rip = kvm_rip_read(&svm->vcpu) + 1;
skip_emulated_instruction(&svm->vcpu);
return kvm_emulate_halt(&svm->vcpu);
}
-static int vmmcall_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+static int vmmcall_interception(struct vcpu_svm *svm)
{
svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
skip_emulated_instruction(&svm->vcpu);
@@ -1378,8 +1378,15 @@ static inline int nested_svm_intr(struct vcpu_svm *svm)
svm->vmcb->control.exit_code = SVM_EXIT_INTR;
- if (nested_svm_exit_handled(svm)) {
- nsvm_printk("VMexit -> INTR\n");
+ if (svm->nested.intercept & 1ULL) {
+ /*
+ * The #vmexit can't be emulated here directly because this
+ * code path runs with irqs and preemtion disabled. A
+ * #vmexit emulation might sleep. Only signal request for
+ * the #vmexit here.
+ */
+ svm->nested.exit_required = true;
+ trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip);
return 1;
}
@@ -1390,10 +1397,7 @@ static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, enum km_type idx)
{
struct page *page;
- down_read(&current->mm->mmap_sem);
page = gfn_to_page(svm->vcpu.kvm, gpa >> PAGE_SHIFT);
- up_read(&current->mm->mmap_sem);
-
if (is_error_page(page))
goto error;
@@ -1532,14 +1536,12 @@ static int nested_svm_exit_handled(struct vcpu_svm *svm)
}
default: {
u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR);
- nsvm_printk("exit code: 0x%x\n", exit_code);
if (svm->nested.intercept & exit_bits)
vmexit = NESTED_EXIT_DONE;
}
}
if (vmexit == NESTED_EXIT_DONE) {
- nsvm_printk("#VMEXIT reason=%04x\n", exit_code);
nested_svm_vmexit(svm);
}
@@ -1584,6 +1586,12 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
struct vmcb *hsave = svm->nested.hsave;
struct vmcb *vmcb = svm->vmcb;
+ trace_kvm_nested_vmexit_inject(vmcb->control.exit_code,
+ vmcb->control.exit_info_1,
+ vmcb->control.exit_info_2,
+ vmcb->control.exit_int_info,
+ vmcb->control.exit_int_info_err);
+
nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, KM_USER0);
if (!nested_vmcb)
return 1;
@@ -1617,6 +1625,22 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
nested_vmcb->control.exit_info_2 = vmcb->control.exit_info_2;
nested_vmcb->control.exit_int_info = vmcb->control.exit_int_info;
nested_vmcb->control.exit_int_info_err = vmcb->control.exit_int_info_err;
+
+ /*
+ * If we emulate a VMRUN/#VMEXIT in the same host #vmexit cycle we have
+ * to make sure that we do not lose injected events. So check event_inj
+ * here and copy it to exit_int_info if it is valid.
+ * Exit_int_info and event_inj can't be both valid because the case
+ * below only happens on a VMRUN instruction intercept which has
+ * no valid exit_int_info set.
+ */
+ if (vmcb->control.event_inj & SVM_EVTINJ_VALID) {
+ struct vmcb_control_area *nc = &nested_vmcb->control;
+
+ nc->exit_int_info = vmcb->control.event_inj;
+ nc->exit_int_info_err = vmcb->control.event_inj_err;
+ }
+
nested_vmcb->control.tlb_ctl = 0;
nested_vmcb->control.event_inj = 0;
nested_vmcb->control.event_inj_err = 0;
@@ -1628,10 +1652,6 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
/* Restore the original control entries */
copy_vmcb_control_area(vmcb, hsave);
- /* Kill any pending exceptions */
- if (svm->vcpu.arch.exception.pending == true)
- nsvm_printk("WARNING: Pending Exception\n");
-
kvm_clear_exception_queue(&svm->vcpu);
kvm_clear_interrupt_queue(&svm->vcpu);
@@ -1702,6 +1722,12 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
/* nested_vmcb is our indicator if nested SVM is activated */
svm->nested.vmcb = svm->vmcb->save.rax;
+ trace_kvm_nested_vmrun(svm->vmcb->save.rip - 3, svm->nested.vmcb,
+ nested_vmcb->save.rip,
+ nested_vmcb->control.int_ctl,
+ nested_vmcb->control.event_inj,
+ nested_vmcb->control.nested_ctl);
+
/* Clear internal status */
kvm_clear_exception_queue(&svm->vcpu);
kvm_clear_interrupt_queue(&svm->vcpu);
@@ -1789,28 +1815,15 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
svm->nested.intercept = nested_vmcb->control.intercept;
force_new_asid(&svm->vcpu);
- svm->vmcb->control.exit_int_info = nested_vmcb->control.exit_int_info;
- svm->vmcb->control.exit_int_info_err = nested_vmcb->control.exit_int_info_err;
svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK;
- if (nested_vmcb->control.int_ctl & V_IRQ_MASK) {
- nsvm_printk("nSVM Injecting Interrupt: 0x%x\n",
- nested_vmcb->control.int_ctl);
- }
if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK)
svm->vcpu.arch.hflags |= HF_VINTR_MASK;
else
svm->vcpu.arch.hflags &= ~HF_VINTR_MASK;
- nsvm_printk("nSVM exit_int_info: 0x%x | int_state: 0x%x\n",
- nested_vmcb->control.exit_int_info,
- nested_vmcb->control.int_state);
-
svm->vmcb->control.int_vector = nested_vmcb->control.int_vector;
svm->vmcb->control.int_state = nested_vmcb->control.int_state;
svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset;
- if (nested_vmcb->control.event_inj & SVM_EVTINJ_VALID)
- nsvm_printk("Injecting Event: 0x%x\n",
- nested_vmcb->control.event_inj);
svm->vmcb->control.event_inj = nested_vmcb->control.event_inj;
svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err;
@@ -1837,7 +1850,7 @@ static void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb)
to_vmcb->save.sysenter_eip = from_vmcb->save.sysenter_eip;
}
-static int vmload_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+static int vmload_interception(struct vcpu_svm *svm)
{
struct vmcb *nested_vmcb;
@@ -1857,7 +1870,7 @@ static int vmload_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
return 1;
}
-static int vmsave_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+static int vmsave_interception(struct vcpu_svm *svm)
{
struct vmcb *nested_vmcb;
@@ -1877,10 +1890,8 @@ static int vmsave_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
return 1;
}
-static int vmrun_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+static int vmrun_interception(struct vcpu_svm *svm)
{
- nsvm_printk("VMrun\n");
-
if (nested_svm_check_permissions(svm))
return 1;
@@ -1907,7 +1918,7 @@ failed:
return 1;
}
-static int stgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+static int stgi_interception(struct vcpu_svm *svm)
{
if (nested_svm_check_permissions(svm))
return 1;
@@ -1920,7 +1931,7 @@ static int stgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
return 1;
}
-static int clgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+static int clgi_interception(struct vcpu_svm *svm)
{
if (nested_svm_check_permissions(svm))
return 1;
@@ -1937,10 +1948,12 @@ static int clgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
return 1;
}
-static int invlpga_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+static int invlpga_interception(struct vcpu_svm *svm)
{
struct kvm_vcpu *vcpu = &svm->vcpu;
- nsvm_printk("INVLPGA\n");
+
+ trace_kvm_invlpga(svm->vmcb->save.rip, vcpu->arch.regs[VCPU_REGS_RCX],
+ vcpu->arch.regs[VCPU_REGS_RAX]);
/* Let's treat INVLPGA the same as INVLPG (can be optimized!) */
kvm_mmu_invlpg(vcpu, vcpu->arch.regs[VCPU_REGS_RAX]);
@@ -1950,15 +1963,21 @@ static int invlpga_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
return 1;
}
-static int invalid_op_interception(struct vcpu_svm *svm,
- struct kvm_run *kvm_run)
+static int skinit_interception(struct vcpu_svm *svm)
{
+ trace_kvm_skinit(svm->vmcb->save.rip, svm->vcpu.arch.regs[VCPU_REGS_RAX]);
+
kvm_queue_exception(&svm->vcpu, UD_VECTOR);
return 1;
}
-static int task_switch_interception(struct vcpu_svm *svm,
- struct kvm_run *kvm_run)
+static int invalid_op_interception(struct vcpu_svm *svm)
+{
+ kvm_queue_exception(&svm->vcpu, UD_VECTOR);
+ return 1;
+}
+
+static int task_switch_interception(struct vcpu_svm *svm)
{
u16 tss_selector;
int reason;
@@ -2008,14 +2027,14 @@ static int task_switch_interception(struct vcpu_svm *svm,
return kvm_task_switch(&svm->vcpu, tss_selector, reason);
}
-static int cpuid_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+static int cpuid_interception(struct vcpu_svm *svm)
{
svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
kvm_emulate_cpuid(&svm->vcpu);
return 1;
}
-static int iret_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+static int iret_interception(struct vcpu_svm *svm)
{
++svm->vcpu.stat.nmi_window_exits;
svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET);
@@ -2023,26 +2042,27 @@ static int iret_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
return 1;
}
-static int invlpg_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+static int invlpg_interception(struct vcpu_svm *svm)
{
- if (emulate_instruction(&svm->vcpu, kvm_run, 0, 0, 0) != EMULATE_DONE)
+ if (emulate_instruction(&svm->vcpu, 0, 0, 0) != EMULATE_DONE)
pr_unimpl(&svm->vcpu, "%s: failed\n", __func__);
return 1;
}
-static int emulate_on_interception(struct vcpu_svm *svm,
- struct kvm_run *kvm_run)
+static int emulate_on_interception(struct vcpu_svm *svm)
{
- if (emulate_instruction(&svm->vcpu, NULL, 0, 0, 0) != EMULATE_DONE)
+ if (emulate_instruction(&svm->vcpu, 0, 0, 0) != EMULATE_DONE)
pr_unimpl(&svm->vcpu, "%s: failed\n", __func__);
return 1;
}
-static int cr8_write_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+static int cr8_write_interception(struct vcpu_svm *svm)
{
+ struct kvm_run *kvm_run = svm->vcpu.run;
+
u8 cr8_prev = kvm_get_cr8(&svm->vcpu);
/* instruction emulation calls kvm_set_cr8() */
- emulate_instruction(&svm->vcpu, NULL, 0, 0, 0);
+ emulate_instruction(&svm->vcpu, 0, 0, 0);
if (irqchip_in_kernel(svm->vcpu.kvm)) {
svm->vmcb->control.intercept_cr_write &= ~INTERCEPT_CR8_MASK;
return 1;
@@ -2128,7 +2148,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
return 0;
}
-static int rdmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+static int rdmsr_interception(struct vcpu_svm *svm)
{
u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX];
u64 data;
@@ -2221,7 +2241,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
return 0;
}
-static int wrmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+static int wrmsr_interception(struct vcpu_svm *svm)
{
u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX];
u64 data = (svm->vcpu.arch.regs[VCPU_REGS_RAX] & -1u)
@@ -2237,17 +2257,18 @@ static int wrmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
return 1;
}
-static int msr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+static int msr_interception(struct vcpu_svm *svm)
{
if (svm->vmcb->control.exit_info_1)
- return wrmsr_interception(svm, kvm_run);
+ return wrmsr_interception(svm);
else
- return rdmsr_interception(svm, kvm_run);
+ return rdmsr_interception(svm);
}
-static int interrupt_window_interception(struct vcpu_svm *svm,
- struct kvm_run *kvm_run)
+static int interrupt_window_interception(struct vcpu_svm *svm)
{
+ struct kvm_run *kvm_run = svm->vcpu.run;
+
svm_clear_vintr(svm);
svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
/*
@@ -2265,8 +2286,13 @@ static int interrupt_window_interception(struct vcpu_svm *svm,
return 1;
}
-static int (*svm_exit_handlers[])(struct vcpu_svm *svm,
- struct kvm_run *kvm_run) = {
+static int pause_interception(struct vcpu_svm *svm)
+{
+ kvm_vcpu_on_spin(&(svm->vcpu));
+ return 1;
+}
+
+static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = {
[SVM_EXIT_READ_CR0] = emulate_on_interception,
[SVM_EXIT_READ_CR3] = emulate_on_interception,
[SVM_EXIT_READ_CR4] = emulate_on_interception,
@@ -2301,6 +2327,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm,
[SVM_EXIT_CPUID] = cpuid_interception,
[SVM_EXIT_IRET] = iret_interception,
[SVM_EXIT_INVD] = emulate_on_interception,
+ [SVM_EXIT_PAUSE] = pause_interception,
[SVM_EXIT_HLT] = halt_interception,
[SVM_EXIT_INVLPG] = invlpg_interception,
[SVM_EXIT_INVLPGA] = invlpga_interception,
@@ -2314,26 +2341,36 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm,
[SVM_EXIT_VMSAVE] = vmsave_interception,
[SVM_EXIT_STGI] = stgi_interception,
[SVM_EXIT_CLGI] = clgi_interception,
- [SVM_EXIT_SKINIT] = invalid_op_interception,
+ [SVM_EXIT_SKINIT] = skinit_interception,
[SVM_EXIT_WBINVD] = emulate_on_interception,
[SVM_EXIT_MONITOR] = invalid_op_interception,
[SVM_EXIT_MWAIT] = invalid_op_interception,
[SVM_EXIT_NPF] = pf_interception,
};
-static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
+static int handle_exit(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
+ struct kvm_run *kvm_run = vcpu->run;
u32 exit_code = svm->vmcb->control.exit_code;
trace_kvm_exit(exit_code, svm->vmcb->save.rip);
+ if (unlikely(svm->nested.exit_required)) {
+ nested_svm_vmexit(svm);
+ svm->nested.exit_required = false;
+
+ return 1;
+ }
+
if (is_nested(svm)) {
int vmexit;
- nsvm_printk("nested handle_exit: 0x%x | 0x%lx | 0x%lx | 0x%lx\n",
- exit_code, svm->vmcb->control.exit_info_1,
- svm->vmcb->control.exit_info_2, svm->vmcb->save.rip);
+ trace_kvm_nested_vmexit(svm->vmcb->save.rip, exit_code,
+ svm->vmcb->control.exit_info_1,
+ svm->vmcb->control.exit_info_2,
+ svm->vmcb->control.exit_int_info,
+ svm->vmcb->control.exit_int_info_err);
vmexit = nested_svm_exit_special(svm);
@@ -2383,15 +2420,15 @@ static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
return 0;
}
- return svm_exit_handlers[exit_code](svm, kvm_run);
+ return svm_exit_handlers[exit_code](svm);
}
static void reload_tss(struct kvm_vcpu *vcpu)
{
int cpu = raw_smp_processor_id();
- struct svm_cpu_data *svm_data = per_cpu(svm_data, cpu);
- svm_data->tss_desc->type = 9; /* available 32/64-bit TSS */
+ struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
+ sd->tss_desc->type = 9; /* available 32/64-bit TSS */
load_TR_desc();
}
@@ -2399,12 +2436,12 @@ static void pre_svm_run(struct vcpu_svm *svm)
{
int cpu = raw_smp_processor_id();
- struct svm_cpu_data *svm_data = per_cpu(svm_data, cpu);
+ struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING;
/* FIXME: handle wraparound of asid_generation */
- if (svm->asid_generation != svm_data->asid_generation)
- new_asid(svm, svm_data);
+ if (svm->asid_generation != sd->asid_generation)
+ new_asid(svm, sd);
}
static void svm_inject_nmi(struct kvm_vcpu *vcpu)
@@ -2460,20 +2497,47 @@ static int svm_nmi_allowed(struct kvm_vcpu *vcpu)
!(svm->vcpu.arch.hflags & HF_NMI_MASK);
}
+static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+
+ return !!(svm->vcpu.arch.hflags & HF_NMI_MASK);
+}
+
+static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+
+ if (masked) {
+ svm->vcpu.arch.hflags |= HF_NMI_MASK;
+ svm->vmcb->control.intercept |= (1UL << INTERCEPT_IRET);
+ } else {
+ svm->vcpu.arch.hflags &= ~HF_NMI_MASK;
+ svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET);
+ }
+}
+
static int svm_interrupt_allowed(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
struct vmcb *vmcb = svm->vmcb;
- return (vmcb->save.rflags & X86_EFLAGS_IF) &&
- !(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) &&
- gif_set(svm) &&
- !(is_nested(svm) && (svm->vcpu.arch.hflags & HF_VINTR_MASK));
+ int ret;
+
+ if (!gif_set(svm) ||
+ (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK))
+ return 0;
+
+ ret = !!(vmcb->save.rflags & X86_EFLAGS_IF);
+
+ if (is_nested(svm))
+ return ret && !(svm->vcpu.arch.hflags & HF_VINTR_MASK);
+
+ return ret;
}
static void enable_irq_window(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
- nsvm_printk("Trying to open IRQ window\n");
nested_svm_intr(svm);
@@ -2498,7 +2562,7 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu)
/* Something prevents NMI from been injected. Single step over
possible problem (IRET or exception injection or interrupt
shadow) */
- vcpu->arch.singlestep = true;
+ svm->nmi_singlestep = true;
svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
update_db_intercept(vcpu);
}
@@ -2588,13 +2652,20 @@ static void svm_complete_interrupts(struct vcpu_svm *svm)
#define R "e"
#endif
-static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+static void svm_vcpu_run(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
u16 fs_selector;
u16 gs_selector;
u16 ldt_selector;
+ /*
+ * A vmexit emulation is required before the vcpu can be executed
+ * again.
+ */
+ if (unlikely(svm->nested.exit_required))
+ return;
+
svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP];
@@ -2893,6 +2964,8 @@ static struct kvm_x86_ops svm_x86_ops = {
.queue_exception = svm_queue_exception,
.interrupt_allowed = svm_interrupt_allowed,
.nmi_allowed = svm_nmi_allowed,
+ .get_nmi_mask = svm_get_nmi_mask,
+ .set_nmi_mask = svm_set_nmi_mask,
.enable_nmi_window = enable_nmi_window,
.enable_irq_window = enable_irq_window,
.update_cr8_intercept = update_cr8_intercept,
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 0d480e77eacf..816e0449db0b 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -349,6 +349,171 @@ TRACE_EVENT(kvm_apic_accept_irq,
__entry->coalesced ? " (coalesced)" : "")
);
+/*
+ * Tracepoint for nested VMRUN
+ */
+TRACE_EVENT(kvm_nested_vmrun,
+ TP_PROTO(__u64 rip, __u64 vmcb, __u64 nested_rip, __u32 int_ctl,
+ __u32 event_inj, bool npt),
+ TP_ARGS(rip, vmcb, nested_rip, int_ctl, event_inj, npt),
+
+ TP_STRUCT__entry(
+ __field( __u64, rip )
+ __field( __u64, vmcb )
+ __field( __u64, nested_rip )
+ __field( __u32, int_ctl )
+ __field( __u32, event_inj )
+ __field( bool, npt )
+ ),
+
+ TP_fast_assign(
+ __entry->rip = rip;
+ __entry->vmcb = vmcb;
+ __entry->nested_rip = nested_rip;
+ __entry->int_ctl = int_ctl;
+ __entry->event_inj = event_inj;
+ __entry->npt = npt;
+ ),
+
+ TP_printk("rip: 0x%016llx vmcb: 0x%016llx nrip: 0x%016llx int_ctl: 0x%08x "
+ "event_inj: 0x%08x npt: %s\n",
+ __entry->rip, __entry->vmcb, __entry->nested_rip,
+ __entry->int_ctl, __entry->event_inj,
+ __entry->npt ? "on" : "off")
+);
+
+/*
+ * Tracepoint for #VMEXIT while nested
+ */
+TRACE_EVENT(kvm_nested_vmexit,
+ TP_PROTO(__u64 rip, __u32 exit_code,
+ __u64 exit_info1, __u64 exit_info2,
+ __u32 exit_int_info, __u32 exit_int_info_err),
+ TP_ARGS(rip, exit_code, exit_info1, exit_info2,
+ exit_int_info, exit_int_info_err),
+
+ TP_STRUCT__entry(
+ __field( __u64, rip )
+ __field( __u32, exit_code )
+ __field( __u64, exit_info1 )
+ __field( __u64, exit_info2 )
+ __field( __u32, exit_int_info )
+ __field( __u32, exit_int_info_err )
+ ),
+
+ TP_fast_assign(
+ __entry->rip = rip;
+ __entry->exit_code = exit_code;
+ __entry->exit_info1 = exit_info1;
+ __entry->exit_info2 = exit_info2;
+ __entry->exit_int_info = exit_int_info;
+ __entry->exit_int_info_err = exit_int_info_err;
+ ),
+ TP_printk("rip: 0x%016llx reason: %s ext_inf1: 0x%016llx "
+ "ext_inf2: 0x%016llx ext_int: 0x%08x ext_int_err: 0x%08x\n",
+ __entry->rip,
+ ftrace_print_symbols_seq(p, __entry->exit_code,
+ kvm_x86_ops->exit_reasons_str),
+ __entry->exit_info1, __entry->exit_info2,
+ __entry->exit_int_info, __entry->exit_int_info_err)
+);
+
+/*
+ * Tracepoint for #VMEXIT reinjected to the guest
+ */
+TRACE_EVENT(kvm_nested_vmexit_inject,
+ TP_PROTO(__u32 exit_code,
+ __u64 exit_info1, __u64 exit_info2,
+ __u32 exit_int_info, __u32 exit_int_info_err),
+ TP_ARGS(exit_code, exit_info1, exit_info2,
+ exit_int_info, exit_int_info_err),
+
+ TP_STRUCT__entry(
+ __field( __u32, exit_code )
+ __field( __u64, exit_info1 )
+ __field( __u64, exit_info2 )
+ __field( __u32, exit_int_info )
+ __field( __u32, exit_int_info_err )
+ ),
+
+ TP_fast_assign(
+ __entry->exit_code = exit_code;
+ __entry->exit_info1 = exit_info1;
+ __entry->exit_info2 = exit_info2;
+ __entry->exit_int_info = exit_int_info;
+ __entry->exit_int_info_err = exit_int_info_err;
+ ),
+
+ TP_printk("reason: %s ext_inf1: 0x%016llx "
+ "ext_inf2: 0x%016llx ext_int: 0x%08x ext_int_err: 0x%08x\n",
+ ftrace_print_symbols_seq(p, __entry->exit_code,
+ kvm_x86_ops->exit_reasons_str),
+ __entry->exit_info1, __entry->exit_info2,
+ __entry->exit_int_info, __entry->exit_int_info_err)
+);
+
+/*
+ * Tracepoint for nested #vmexit because of interrupt pending
+ */
+TRACE_EVENT(kvm_nested_intr_vmexit,
+ TP_PROTO(__u64 rip),
+ TP_ARGS(rip),
+
+ TP_STRUCT__entry(
+ __field( __u64, rip )
+ ),
+
+ TP_fast_assign(
+ __entry->rip = rip
+ ),
+
+ TP_printk("rip: 0x%016llx\n", __entry->rip)
+);
+
+/*
+ * Tracepoint for nested #vmexit because of interrupt pending
+ */
+TRACE_EVENT(kvm_invlpga,
+ TP_PROTO(__u64 rip, int asid, u64 address),
+ TP_ARGS(rip, asid, address),
+
+ TP_STRUCT__entry(
+ __field( __u64, rip )
+ __field( int, asid )
+ __field( __u64, address )
+ ),
+
+ TP_fast_assign(
+ __entry->rip = rip;
+ __entry->asid = asid;
+ __entry->address = address;
+ ),
+
+ TP_printk("rip: 0x%016llx asid: %d address: 0x%016llx\n",
+ __entry->rip, __entry->asid, __entry->address)
+);
+
+/*
+ * Tracepoint for nested #vmexit because of interrupt pending
+ */
+TRACE_EVENT(kvm_skinit,
+ TP_PROTO(__u64 rip, __u32 slb),
+ TP_ARGS(rip, slb),
+
+ TP_STRUCT__entry(
+ __field( __u64, rip )
+ __field( __u32, slb )
+ ),
+
+ TP_fast_assign(
+ __entry->rip = rip;
+ __entry->slb = slb;
+ ),
+
+ TP_printk("rip: 0x%016llx slb: 0x%08x\n",
+ __entry->rip, __entry->slb)
+);
+
#endif /* _TRACE_KVM_H */
/* This part must be outside protection */
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index ed53b42caba1..d4918d6fc924 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -61,12 +61,37 @@ module_param_named(unrestricted_guest,
static int __read_mostly emulate_invalid_guest_state = 0;
module_param(emulate_invalid_guest_state, bool, S_IRUGO);
+/*
+ * These 2 parameters are used to config the controls for Pause-Loop Exiting:
+ * ple_gap: upper bound on the amount of time between two successive
+ * executions of PAUSE in a loop. Also indicate if ple enabled.
+ * According to test, this time is usually small than 41 cycles.
+ * ple_window: upper bound on the amount of time a guest is allowed to execute
+ * in a PAUSE loop. Tests indicate that most spinlocks are held for
+ * less than 2^12 cycles
+ * Time is measured based on a counter that runs at the same rate as the TSC,
+ * refer SDM volume 3b section 21.6.13 & 22.1.3.
+ */
+#define KVM_VMX_DEFAULT_PLE_GAP 41
+#define KVM_VMX_DEFAULT_PLE_WINDOW 4096
+static int ple_gap = KVM_VMX_DEFAULT_PLE_GAP;
+module_param(ple_gap, int, S_IRUGO);
+
+static int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW;
+module_param(ple_window, int, S_IRUGO);
+
struct vmcs {
u32 revision_id;
u32 abort;
char data[0];
};
+struct shared_msr_entry {
+ unsigned index;
+ u64 data;
+ u64 mask;
+};
+
struct vcpu_vmx {
struct kvm_vcpu vcpu;
struct list_head local_vcpus_link;
@@ -74,13 +99,12 @@ struct vcpu_vmx {
int launched;
u8 fail;
u32 idt_vectoring_info;
- struct kvm_msr_entry *guest_msrs;
- struct kvm_msr_entry *host_msrs;
+ struct shared_msr_entry *guest_msrs;
int nmsrs;
int save_nmsrs;
- int msr_offset_efer;
#ifdef CONFIG_X86_64
- int msr_offset_kernel_gs_base;
+ u64 msr_host_kernel_gs_base;
+ u64 msr_guest_kernel_gs_base;
#endif
struct vmcs *vmcs;
struct {
@@ -88,7 +112,6 @@ struct vcpu_vmx {
u16 fs_sel, gs_sel, ldt_sel;
int gs_ldt_reload_needed;
int fs_reload_needed;
- int guest_efer_loaded;
} host_state;
struct {
int vm86_active;
@@ -107,7 +130,6 @@ struct vcpu_vmx {
} rmode;
int vpid;
bool emulation_required;
- enum emulation_result invalid_state_emulation_result;
/* Support for vnmi-less CPUs */
int soft_vnmi_blocked;
@@ -176,6 +198,8 @@ static struct kvm_vmx_segment_field {
VMX_SEGMENT_FIELD(LDTR),
};
+static u64 host_efer;
+
static void ept_save_pdptrs(struct kvm_vcpu *vcpu);
/*
@@ -184,28 +208,12 @@ static void ept_save_pdptrs(struct kvm_vcpu *vcpu);
*/
static const u32 vmx_msr_index[] = {
#ifdef CONFIG_X86_64
- MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR, MSR_KERNEL_GS_BASE,
+ MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR,
#endif
MSR_EFER, MSR_K6_STAR,
};
#define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index)
-static void load_msrs(struct kvm_msr_entry *e, int n)
-{
- int i;
-
- for (i = 0; i < n; ++i)
- wrmsrl(e[i].index, e[i].data);
-}
-
-static void save_msrs(struct kvm_msr_entry *e, int n)
-{
- int i;
-
- for (i = 0; i < n; ++i)
- rdmsrl(e[i].index, e[i].data);
-}
-
static inline int is_page_fault(u32 intr_info)
{
return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
@@ -320,6 +328,12 @@ static inline int cpu_has_vmx_unrestricted_guest(void)
SECONDARY_EXEC_UNRESTRICTED_GUEST;
}
+static inline int cpu_has_vmx_ple(void)
+{
+ return vmcs_config.cpu_based_2nd_exec_ctrl &
+ SECONDARY_EXEC_PAUSE_LOOP_EXITING;
+}
+
static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm)
{
return flexpriority_enabled &&
@@ -348,7 +362,7 @@ static int __find_msr_index(struct vcpu_vmx *vmx, u32 msr)
int i;
for (i = 0; i < vmx->nmsrs; ++i)
- if (vmx->guest_msrs[i].index == msr)
+ if (vmx_msr_index[vmx->guest_msrs[i].index] == msr)
return i;
return -1;
}
@@ -379,7 +393,7 @@ static inline void __invept(int ext, u64 eptp, gpa_t gpa)
: : "a" (&operand), "c" (ext) : "cc", "memory");
}
-static struct kvm_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr)
+static struct shared_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr)
{
int i;
@@ -570,17 +584,12 @@ static void reload_tss(void)
load_TR_desc();
}
-static void load_transition_efer(struct vcpu_vmx *vmx)
+static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
{
- int efer_offset = vmx->msr_offset_efer;
- u64 host_efer;
u64 guest_efer;
u64 ignore_bits;
- if (efer_offset < 0)
- return;
- host_efer = vmx->host_msrs[efer_offset].data;
- guest_efer = vmx->guest_msrs[efer_offset].data;
+ guest_efer = vmx->vcpu.arch.shadow_efer;
/*
* NX is emulated; LMA and LME handled by hardware; SCE meaninless
@@ -593,27 +602,17 @@ static void load_transition_efer(struct vcpu_vmx *vmx)
if (guest_efer & EFER_LMA)
ignore_bits &= ~(u64)EFER_SCE;
#endif
- if ((guest_efer & ~ignore_bits) == (host_efer & ~ignore_bits))
- return;
-
- vmx->host_state.guest_efer_loaded = 1;
guest_efer &= ~ignore_bits;
guest_efer |= host_efer & ignore_bits;
- wrmsrl(MSR_EFER, guest_efer);
- vmx->vcpu.stat.efer_reload++;
-}
-
-static void reload_host_efer(struct vcpu_vmx *vmx)
-{
- if (vmx->host_state.guest_efer_loaded) {
- vmx->host_state.guest_efer_loaded = 0;
- load_msrs(vmx->host_msrs + vmx->msr_offset_efer, 1);
- }
+ vmx->guest_msrs[efer_offset].data = guest_efer;
+ vmx->guest_msrs[efer_offset].mask = ~ignore_bits;
+ return true;
}
static void vmx_save_host_state(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
+ int i;
if (vmx->host_state.loaded)
return;
@@ -650,13 +649,15 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu)
#endif
#ifdef CONFIG_X86_64
- if (is_long_mode(&vmx->vcpu))
- save_msrs(vmx->host_msrs +
- vmx->msr_offset_kernel_gs_base, 1);
-
+ if (is_long_mode(&vmx->vcpu)) {
+ rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
+ wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
+ }
#endif
- load_msrs(vmx->guest_msrs, vmx->save_nmsrs);
- load_transition_efer(vmx);
+ for (i = 0; i < vmx->save_nmsrs; ++i)
+ kvm_set_shared_msr(vmx->guest_msrs[i].index,
+ vmx->guest_msrs[i].data,
+ vmx->guest_msrs[i].mask);
}
static void __vmx_load_host_state(struct vcpu_vmx *vmx)
@@ -684,9 +685,12 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx)
local_irq_restore(flags);
}
reload_tss();
- save_msrs(vmx->guest_msrs, vmx->save_nmsrs);
- load_msrs(vmx->host_msrs, vmx->save_nmsrs);
- reload_host_efer(vmx);
+#ifdef CONFIG_X86_64
+ if (is_long_mode(&vmx->vcpu)) {
+ rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
+ wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
+ }
+#endif
}
static void vmx_load_host_state(struct vcpu_vmx *vmx)
@@ -877,19 +881,14 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
/*
* Swap MSR entry in host/guest MSR entry array.
*/
-#ifdef CONFIG_X86_64
static void move_msr_up(struct vcpu_vmx *vmx, int from, int to)
{
- struct kvm_msr_entry tmp;
+ struct shared_msr_entry tmp;
tmp = vmx->guest_msrs[to];
vmx->guest_msrs[to] = vmx->guest_msrs[from];
vmx->guest_msrs[from] = tmp;
- tmp = vmx->host_msrs[to];
- vmx->host_msrs[to] = vmx->host_msrs[from];
- vmx->host_msrs[from] = tmp;
}
-#endif
/*
* Set up the vmcs to automatically save and restore system
@@ -898,15 +897,13 @@ static void move_msr_up(struct vcpu_vmx *vmx, int from, int to)
*/
static void setup_msrs(struct vcpu_vmx *vmx)
{
- int save_nmsrs;
+ int save_nmsrs, index;
unsigned long *msr_bitmap;
vmx_load_host_state(vmx);
save_nmsrs = 0;
#ifdef CONFIG_X86_64
if (is_long_mode(&vmx->vcpu)) {
- int index;
-
index = __find_msr_index(vmx, MSR_SYSCALL_MASK);
if (index >= 0)
move_msr_up(vmx, index, save_nmsrs++);
@@ -916,9 +913,6 @@ static void setup_msrs(struct vcpu_vmx *vmx)
index = __find_msr_index(vmx, MSR_CSTAR);
if (index >= 0)
move_msr_up(vmx, index, save_nmsrs++);
- index = __find_msr_index(vmx, MSR_KERNEL_GS_BASE);
- if (index >= 0)
- move_msr_up(vmx, index, save_nmsrs++);
/*
* MSR_K6_STAR is only needed on long mode guests, and only
* if efer.sce is enabled.
@@ -928,13 +922,11 @@ static void setup_msrs(struct vcpu_vmx *vmx)
move_msr_up(vmx, index, save_nmsrs++);
}
#endif
- vmx->save_nmsrs = save_nmsrs;
+ index = __find_msr_index(vmx, MSR_EFER);
+ if (index >= 0 && update_transition_efer(vmx, index))
+ move_msr_up(vmx, index, save_nmsrs++);
-#ifdef CONFIG_X86_64
- vmx->msr_offset_kernel_gs_base =
- __find_msr_index(vmx, MSR_KERNEL_GS_BASE);
-#endif
- vmx->msr_offset_efer = __find_msr_index(vmx, MSR_EFER);
+ vmx->save_nmsrs = save_nmsrs;
if (cpu_has_vmx_msr_bitmap()) {
if (is_long_mode(&vmx->vcpu))
@@ -976,7 +968,7 @@ static void guest_write_tsc(u64 guest_tsc, u64 host_tsc)
static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
{
u64 data;
- struct kvm_msr_entry *msr;
+ struct shared_msr_entry *msr;
if (!pdata) {
printk(KERN_ERR "BUG: get_msr called with NULL pdata\n");
@@ -991,9 +983,13 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
case MSR_GS_BASE:
data = vmcs_readl(GUEST_GS_BASE);
break;
+ case MSR_KERNEL_GS_BASE:
+ vmx_load_host_state(to_vmx(vcpu));
+ data = to_vmx(vcpu)->msr_guest_kernel_gs_base;
+ break;
+#endif
case MSR_EFER:
return kvm_get_msr_common(vcpu, msr_index, pdata);
-#endif
case MSR_IA32_TSC:
data = guest_read_tsc();
break;
@@ -1007,6 +1003,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
data = vmcs_readl(GUEST_SYSENTER_ESP);
break;
default:
+ vmx_load_host_state(to_vmx(vcpu));
msr = find_msr_entry(to_vmx(vcpu), msr_index);
if (msr) {
vmx_load_host_state(to_vmx(vcpu));
@@ -1028,7 +1025,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- struct kvm_msr_entry *msr;
+ struct shared_msr_entry *msr;
u64 host_tsc;
int ret = 0;
@@ -1044,6 +1041,10 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
case MSR_GS_BASE:
vmcs_writel(GUEST_GS_BASE, data);
break;
+ case MSR_KERNEL_GS_BASE:
+ vmx_load_host_state(vmx);
+ vmx->msr_guest_kernel_gs_base = data;
+ break;
#endif
case MSR_IA32_SYSENTER_CS:
vmcs_write32(GUEST_SYSENTER_CS, data);
@@ -1097,30 +1098,14 @@ static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
}
}
-static int set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
+static void set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
{
- int old_debug = vcpu->guest_debug;
- unsigned long flags;
-
- vcpu->guest_debug = dbg->control;
- if (!(vcpu->guest_debug & KVM_GUESTDBG_ENABLE))
- vcpu->guest_debug = 0;
-
if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
vmcs_writel(GUEST_DR7, dbg->arch.debugreg[7]);
else
vmcs_writel(GUEST_DR7, vcpu->arch.dr7);
- flags = vmcs_readl(GUEST_RFLAGS);
- if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
- flags |= X86_EFLAGS_TF | X86_EFLAGS_RF;
- else if (old_debug & KVM_GUESTDBG_SINGLESTEP)
- flags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
- vmcs_writel(GUEST_RFLAGS, flags);
-
update_exception_bitmap(vcpu);
-
- return 0;
}
static __init int cpu_has_kvm_support(void)
@@ -1139,12 +1124,15 @@ static __init int vmx_disabled_by_bios(void)
/* locked but not enabled */
}
-static void hardware_enable(void *garbage)
+static int hardware_enable(void *garbage)
{
int cpu = raw_smp_processor_id();
u64 phys_addr = __pa(per_cpu(vmxarea, cpu));
u64 old;
+ if (read_cr4() & X86_CR4_VMXE)
+ return -EBUSY;
+
INIT_LIST_HEAD(&per_cpu(vcpus_on_cpu, cpu));
rdmsrl(MSR_IA32_FEATURE_CONTROL, old);
if ((old & (FEATURE_CONTROL_LOCKED |
@@ -1159,6 +1147,10 @@ static void hardware_enable(void *garbage)
asm volatile (ASM_VMX_VMXON_RAX
: : "a"(&phys_addr), "m"(phys_addr)
: "memory", "cc");
+
+ ept_sync_global();
+
+ return 0;
}
static void vmclear_local_vcpus(void)
@@ -1250,7 +1242,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
SECONDARY_EXEC_WBINVD_EXITING |
SECONDARY_EXEC_ENABLE_VPID |
SECONDARY_EXEC_ENABLE_EPT |
- SECONDARY_EXEC_UNRESTRICTED_GUEST;
+ SECONDARY_EXEC_UNRESTRICTED_GUEST |
+ SECONDARY_EXEC_PAUSE_LOOP_EXITING;
if (adjust_vmx_controls(min2, opt2,
MSR_IA32_VMX_PROCBASED_CTLS2,
&_cpu_based_2nd_exec_control) < 0)
@@ -1344,15 +1337,17 @@ static void free_kvm_area(void)
{
int cpu;
- for_each_online_cpu(cpu)
+ for_each_possible_cpu(cpu) {
free_vmcs(per_cpu(vmxarea, cpu));
+ per_cpu(vmxarea, cpu) = NULL;
+ }
}
static __init int alloc_kvm_area(void)
{
int cpu;
- for_each_online_cpu(cpu) {
+ for_each_possible_cpu(cpu) {
struct vmcs *vmcs;
vmcs = alloc_vmcs_cpu(cpu);
@@ -1394,6 +1389,9 @@ static __init int hardware_setup(void)
if (enable_ept && !cpu_has_vmx_ept_2m_page())
kvm_disable_largepages();
+ if (!cpu_has_vmx_ple())
+ ple_gap = 0;
+
return alloc_kvm_area();
}
@@ -1536,8 +1534,16 @@ continue_rmode:
static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- struct kvm_msr_entry *msr = find_msr_entry(vmx, MSR_EFER);
+ struct shared_msr_entry *msr = find_msr_entry(vmx, MSR_EFER);
+
+ if (!msr)
+ return;
+ /*
+ * Force kernel_gs_base reloading before EFER changes, as control
+ * of this msr depends on is_long_mode().
+ */
+ vmx_load_host_state(to_vmx(vcpu));
vcpu->arch.shadow_efer = efer;
if (!msr)
return;
@@ -1727,6 +1733,7 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
vmcs_write64(EPT_POINTER, eptp);
guest_cr3 = is_paging(vcpu) ? vcpu->arch.cr3 :
vcpu->kvm->arch.ept_identity_map_addr;
+ ept_load_pdptrs(vcpu);
}
vmx_flush_tlb(vcpu);
@@ -2302,13 +2309,22 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
if (vmx->vpid == 0)
exec_control &= ~SECONDARY_EXEC_ENABLE_VPID;
- if (!enable_ept)
+ if (!enable_ept) {
exec_control &= ~SECONDARY_EXEC_ENABLE_EPT;
+ enable_unrestricted_guest = 0;
+ }
if (!enable_unrestricted_guest)
exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST;
+ if (!ple_gap)
+ exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING;
vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
}
+ if (ple_gap) {
+ vmcs_write32(PLE_GAP, ple_gap);
+ vmcs_write32(PLE_WINDOW, ple_window);
+ }
+
vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, !!bypass_guest_pf);
vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, !!bypass_guest_pf);
vmcs_write32(CR3_TARGET_COUNT, 0); /* 22.2.1 */
@@ -2376,10 +2392,9 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
if (wrmsr_safe(index, data_low, data_high) < 0)
continue;
data = data_low | ((u64)data_high << 32);
- vmx->host_msrs[j].index = index;
- vmx->host_msrs[j].reserved = 0;
- vmx->host_msrs[j].data = data;
- vmx->guest_msrs[j] = vmx->host_msrs[j];
+ vmx->guest_msrs[j].index = i;
+ vmx->guest_msrs[j].data = 0;
+ vmx->guest_msrs[j].mask = -1ull;
++vmx->nmsrs;
}
@@ -2510,7 +2525,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
if (vmx->vpid != 0)
vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
- vmx->vcpu.arch.cr0 = 0x60000010;
+ vmx->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET;
vmx_set_cr0(&vmx->vcpu, vmx->vcpu.arch.cr0); /* enter rmode */
vmx_set_cr4(&vmx->vcpu, 0);
vmx_set_efer(&vmx->vcpu, 0);
@@ -2627,6 +2642,34 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
GUEST_INTR_STATE_NMI));
}
+static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu)
+{
+ if (!cpu_has_virtual_nmis())
+ return to_vmx(vcpu)->soft_vnmi_blocked;
+ else
+ return !!(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
+ GUEST_INTR_STATE_NMI);
+}
+
+static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+ if (!cpu_has_virtual_nmis()) {
+ if (vmx->soft_vnmi_blocked != masked) {
+ vmx->soft_vnmi_blocked = masked;
+ vmx->vnmi_blocked_time = 0;
+ }
+ } else {
+ if (masked)
+ vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
+ GUEST_INTR_STATE_NMI);
+ else
+ vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
+ GUEST_INTR_STATE_NMI);
+ }
+}
+
static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu)
{
return (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
@@ -2659,7 +2702,7 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu,
* Cause the #SS fault with 0 error code in VM86 mode.
*/
if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0)
- if (emulate_instruction(vcpu, NULL, 0, 0, 0) == EMULATE_DONE)
+ if (emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DONE)
return 1;
/*
* Forward all other exceptions that are valid in real mode.
@@ -2710,15 +2753,16 @@ static void kvm_machine_check(void)
#endif
}
-static int handle_machine_check(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+static int handle_machine_check(struct kvm_vcpu *vcpu)
{
/* already handled by vcpu_run */
return 1;
}
-static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+static int handle_exception(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
+ struct kvm_run *kvm_run = vcpu->run;
u32 intr_info, ex_no, error_code;
unsigned long cr2, rip, dr6;
u32 vect_info;
@@ -2728,12 +2772,17 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
if (is_machine_check(intr_info))
- return handle_machine_check(vcpu, kvm_run);
+ return handle_machine_check(vcpu);
if ((vect_info & VECTORING_INFO_VALID_MASK) &&
- !is_page_fault(intr_info))
- printk(KERN_ERR "%s: unexpected, vectoring info 0x%x "
- "intr info 0x%x\n", __func__, vect_info, intr_info);
+ !is_page_fault(intr_info)) {
+ vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_SIMUL_EX;
+ vcpu->run->internal.ndata = 2;
+ vcpu->run->internal.data[0] = vect_info;
+ vcpu->run->internal.data[1] = intr_info;
+ return 0;
+ }
if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR)
return 1; /* already handled by vmx_vcpu_run() */
@@ -2744,7 +2793,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
}
if (is_invalid_opcode(intr_info)) {
- er = emulate_instruction(vcpu, kvm_run, 0, 0, EMULTYPE_TRAP_UD);
+ er = emulate_instruction(vcpu, 0, 0, EMULTYPE_TRAP_UD);
if (er != EMULATE_DONE)
kvm_queue_exception(vcpu, UD_VECTOR);
return 1;
@@ -2803,20 +2852,19 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
return 0;
}
-static int handle_external_interrupt(struct kvm_vcpu *vcpu,
- struct kvm_run *kvm_run)
+static int handle_external_interrupt(struct kvm_vcpu *vcpu)
{
++vcpu->stat.irq_exits;
return 1;
}
-static int handle_triple_fault(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+static int handle_triple_fault(struct kvm_vcpu *vcpu)
{
- kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
+ vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
return 0;
}
-static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+static int handle_io(struct kvm_vcpu *vcpu)
{
unsigned long exit_qualification;
int size, in, string;
@@ -2827,8 +2875,7 @@ static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
string = (exit_qualification & 16) != 0;
if (string) {
- if (emulate_instruction(vcpu,
- kvm_run, 0, 0, 0) == EMULATE_DO_MMIO)
+ if (emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DO_MMIO)
return 0;
return 1;
}
@@ -2838,7 +2885,7 @@ static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
port = exit_qualification >> 16;
skip_emulated_instruction(vcpu);
- return kvm_emulate_pio(vcpu, kvm_run, in, size, port);
+ return kvm_emulate_pio(vcpu, in, size, port);
}
static void
@@ -2852,7 +2899,7 @@ vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
hypercall[2] = 0xc1;
}
-static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+static int handle_cr(struct kvm_vcpu *vcpu)
{
unsigned long exit_qualification, val;
int cr;
@@ -2887,7 +2934,7 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
return 1;
if (cr8_prev <= cr8)
return 1;
- kvm_run->exit_reason = KVM_EXIT_SET_TPR;
+ vcpu->run->exit_reason = KVM_EXIT_SET_TPR;
return 0;
}
};
@@ -2922,13 +2969,13 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
default:
break;
}
- kvm_run->exit_reason = 0;
+ vcpu->run->exit_reason = 0;
pr_unimpl(vcpu, "unhandled control register: op %d cr %d\n",
(int)(exit_qualification >> 4) & 3, cr);
return 0;
}
-static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+static int handle_dr(struct kvm_vcpu *vcpu)
{
unsigned long exit_qualification;
unsigned long val;
@@ -2944,13 +2991,13 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
* guest debugging itself.
*/
if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
- kvm_run->debug.arch.dr6 = vcpu->arch.dr6;
- kvm_run->debug.arch.dr7 = dr;
- kvm_run->debug.arch.pc =
+ vcpu->run->debug.arch.dr6 = vcpu->arch.dr6;
+ vcpu->run->debug.arch.dr7 = dr;
+ vcpu->run->debug.arch.pc =
vmcs_readl(GUEST_CS_BASE) +
vmcs_readl(GUEST_RIP);
- kvm_run->debug.arch.exception = DB_VECTOR;
- kvm_run->exit_reason = KVM_EXIT_DEBUG;
+ vcpu->run->debug.arch.exception = DB_VECTOR;
+ vcpu->run->exit_reason = KVM_EXIT_DEBUG;
return 0;
} else {
vcpu->arch.dr7 &= ~DR7_GD;
@@ -3016,13 +3063,13 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
return 1;
}
-static int handle_cpuid(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+static int handle_cpuid(struct kvm_vcpu *vcpu)
{
kvm_emulate_cpuid(vcpu);
return 1;
}
-static int handle_rdmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+static int handle_rdmsr(struct kvm_vcpu *vcpu)
{
u32 ecx = vcpu->arch.regs[VCPU_REGS_RCX];
u64 data;
@@ -3041,7 +3088,7 @@ static int handle_rdmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
return 1;
}
-static int handle_wrmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+static int handle_wrmsr(struct kvm_vcpu *vcpu)
{
u32 ecx = vcpu->arch.regs[VCPU_REGS_RCX];
u64 data = (vcpu->arch.regs[VCPU_REGS_RAX] & -1u)
@@ -3058,14 +3105,12 @@ static int handle_wrmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
return 1;
}
-static int handle_tpr_below_threshold(struct kvm_vcpu *vcpu,
- struct kvm_run *kvm_run)
+static int handle_tpr_below_threshold(struct kvm_vcpu *vcpu)
{
return 1;
}
-static int handle_interrupt_window(struct kvm_vcpu *vcpu,
- struct kvm_run *kvm_run)
+static int handle_interrupt_window(struct kvm_vcpu *vcpu)
{
u32 cpu_based_vm_exec_control;
@@ -3081,34 +3126,34 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu,
* possible
*/
if (!irqchip_in_kernel(vcpu->kvm) &&
- kvm_run->request_interrupt_window &&
+ vcpu->run->request_interrupt_window &&
!kvm_cpu_has_interrupt(vcpu)) {
- kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
+ vcpu->run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
return 0;
}
return 1;
}
-static int handle_halt(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+static int handle_halt(struct kvm_vcpu *vcpu)
{
skip_emulated_instruction(vcpu);
return kvm_emulate_halt(vcpu);
}
-static int handle_vmcall(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+static int handle_vmcall(struct kvm_vcpu *vcpu)
{
skip_emulated_instruction(vcpu);
kvm_emulate_hypercall(vcpu);
return 1;
}
-static int handle_vmx_insn(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+static int handle_vmx_insn(struct kvm_vcpu *vcpu)
{
kvm_queue_exception(vcpu, UD_VECTOR);
return 1;
}
-static int handle_invlpg(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+static int handle_invlpg(struct kvm_vcpu *vcpu)
{
unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
@@ -3117,14 +3162,14 @@ static int handle_invlpg(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
return 1;
}
-static int handle_wbinvd(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+static int handle_wbinvd(struct kvm_vcpu *vcpu)
{
skip_emulated_instruction(vcpu);
/* TODO: Add support for VT-d/pass-through device */
return 1;
}
-static int handle_apic_access(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+static int handle_apic_access(struct kvm_vcpu *vcpu)
{
unsigned long exit_qualification;
enum emulation_result er;
@@ -3133,7 +3178,7 @@ static int handle_apic_access(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
offset = exit_qualification & 0xffful;
- er = emulate_instruction(vcpu, kvm_run, 0, 0, 0);
+ er = emulate_instruction(vcpu, 0, 0, 0);
if (er != EMULATE_DONE) {
printk(KERN_ERR
@@ -3144,7 +3189,7 @@ static int handle_apic_access(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
return 1;
}
-static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+static int handle_task_switch(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
unsigned long exit_qualification;
@@ -3198,7 +3243,7 @@ static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
return 1;
}
-static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+static int handle_ept_violation(struct kvm_vcpu *vcpu)
{
unsigned long exit_qualification;
gpa_t gpa;
@@ -3219,8 +3264,8 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
vmcs_readl(GUEST_LINEAR_ADDRESS));
printk(KERN_ERR "EPT: Exit qualification is 0x%lx\n",
(long unsigned int)exit_qualification);
- kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
- kvm_run->hw.hardware_exit_reason = EXIT_REASON_EPT_VIOLATION;
+ vcpu->run->exit_reason = KVM_EXIT_UNKNOWN;
+ vcpu->run->hw.hardware_exit_reason = EXIT_REASON_EPT_VIOLATION;
return 0;
}
@@ -3290,7 +3335,7 @@ static void ept_misconfig_inspect_spte(struct kvm_vcpu *vcpu, u64 spte,
}
}
-static int handle_ept_misconfig(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
{
u64 sptes[4];
int nr_sptes, i;
@@ -3306,13 +3351,13 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
for (i = PT64_ROOT_LEVEL; i > PT64_ROOT_LEVEL - nr_sptes; --i)
ept_misconfig_inspect_spte(vcpu, sptes[i-1], i);
- kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
- kvm_run->hw.hardware_exit_reason = EXIT_REASON_EPT_MISCONFIG;
+ vcpu->run->exit_reason = KVM_EXIT_UNKNOWN;
+ vcpu->run->hw.hardware_exit_reason = EXIT_REASON_EPT_MISCONFIG;
return 0;
}
-static int handle_nmi_window(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+static int handle_nmi_window(struct kvm_vcpu *vcpu)
{
u32 cpu_based_vm_exec_control;
@@ -3325,36 +3370,50 @@ static int handle_nmi_window(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
return 1;
}
-static void handle_invalid_guest_state(struct kvm_vcpu *vcpu,
- struct kvm_run *kvm_run)
+static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
enum emulation_result err = EMULATE_DONE;
-
- local_irq_enable();
- preempt_enable();
+ int ret = 1;
while (!guest_state_valid(vcpu)) {
- err = emulate_instruction(vcpu, kvm_run, 0, 0, 0);
+ err = emulate_instruction(vcpu, 0, 0, 0);
- if (err == EMULATE_DO_MMIO)
- break;
+ if (err == EMULATE_DO_MMIO) {
+ ret = 0;
+ goto out;
+ }
if (err != EMULATE_DONE) {
kvm_report_emulation_failure(vcpu, "emulation failure");
- break;
+ vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
+ vcpu->run->internal.ndata = 0;
+ ret = 0;
+ goto out;
}
if (signal_pending(current))
- break;
+ goto out;
if (need_resched())
schedule();
}
- preempt_disable();
- local_irq_disable();
+ vmx->emulation_required = 0;
+out:
+ return ret;
+}
- vmx->invalid_state_emulation_result = err;
+/*
+ * Indicate a busy-waiting vcpu in spinlock. We do not enable the PAUSE
+ * exiting, so only get here on cpu with PAUSE-Loop-Exiting.
+ */
+static int handle_pause(struct kvm_vcpu *vcpu)
+{
+ skip_emulated_instruction(vcpu);
+ kvm_vcpu_on_spin(vcpu);
+
+ return 1;
}
/*
@@ -3362,8 +3421,7 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu,
* may resume. Otherwise they set the kvm_run parameter to indicate what needs
* to be done to userspace and return 0.
*/
-static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu,
- struct kvm_run *kvm_run) = {
+static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
[EXIT_REASON_EXCEPTION_NMI] = handle_exception,
[EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt,
[EXIT_REASON_TRIPLE_FAULT] = handle_triple_fault,
@@ -3394,6 +3452,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu,
[EXIT_REASON_MCE_DURING_VMENTRY] = handle_machine_check,
[EXIT_REASON_EPT_VIOLATION] = handle_ept_violation,
[EXIT_REASON_EPT_MISCONFIG] = handle_ept_misconfig,
+ [EXIT_REASON_PAUSE_INSTRUCTION] = handle_pause,
};
static const int kvm_vmx_max_exit_handlers =
@@ -3403,7 +3462,7 @@ static const int kvm_vmx_max_exit_handlers =
* The guest has exited. See if we can fix it or if we need userspace
* assistance.
*/
-static int vmx_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
+static int vmx_handle_exit(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
u32 exit_reason = vmx->exit_reason;
@@ -3411,13 +3470,9 @@ static int vmx_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
trace_kvm_exit(exit_reason, kvm_rip_read(vcpu));
- /* If we need to emulate an MMIO from handle_invalid_guest_state
- * we just return 0 */
- if (vmx->emulation_required && emulate_invalid_guest_state) {
- if (guest_state_valid(vcpu))
- vmx->emulation_required = 0;
- return vmx->invalid_state_emulation_result != EMULATE_DO_MMIO;
- }
+ /* If guest state is invalid, start emulating */
+ if (vmx->emulation_required && emulate_invalid_guest_state)
+ return handle_invalid_guest_state(vcpu);
/* Access CR3 don't cause VMExit in paging mode, so we need
* to sync with guest real CR3. */
@@ -3425,8 +3480,8 @@ static int vmx_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
if (unlikely(vmx->fail)) {
- kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
- kvm_run->fail_entry.hardware_entry_failure_reason
+ vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY;
+ vcpu->run->fail_entry.hardware_entry_failure_reason
= vmcs_read32(VM_INSTRUCTION_ERROR);
return 0;
}
@@ -3459,10 +3514,10 @@ static int vmx_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
if (exit_reason < kvm_vmx_max_exit_handlers
&& kvm_vmx_exit_handlers[exit_reason])
- return kvm_vmx_exit_handlers[exit_reason](vcpu, kvm_run);
+ return kvm_vmx_exit_handlers[exit_reason](vcpu);
else {
- kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
- kvm_run->hw.hardware_exit_reason = exit_reason;
+ vcpu->run->exit_reason = KVM_EXIT_UNKNOWN;
+ vcpu->run->hw.hardware_exit_reason = exit_reason;
}
return 0;
}
@@ -3600,23 +3655,18 @@ static void fixup_rmode_irq(struct vcpu_vmx *vmx)
#define Q "l"
#endif
-static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- if (enable_ept && is_paging(vcpu)) {
- vmcs_writel(GUEST_CR3, vcpu->arch.cr3);
- ept_load_pdptrs(vcpu);
- }
/* Record the guest's net vcpu time for enforced NMI injections. */
if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked))
vmx->entry_time = ktime_get();
- /* Handle invalid guest state instead of entering VMX */
- if (vmx->emulation_required && emulate_invalid_guest_state) {
- handle_invalid_guest_state(vcpu, kvm_run);
+ /* Don't enter VMX if guest state is invalid, let the exit handler
+ start emulation until we arrive back to a valid state */
+ if (vmx->emulation_required && emulate_invalid_guest_state)
return;
- }
if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty))
vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]);
@@ -3775,7 +3825,6 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
__clear_bit(vmx->vpid, vmx_vpid_bitmap);
spin_unlock(&vmx_vpid_lock);
vmx_free_vmcs(vcpu);
- kfree(vmx->host_msrs);
kfree(vmx->guest_msrs);
kvm_vcpu_uninit(vcpu);
kmem_cache_free(kvm_vcpu_cache, vmx);
@@ -3802,10 +3851,6 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
goto uninit_vcpu;
}
- vmx->host_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL);
- if (!vmx->host_msrs)
- goto free_guest_msrs;
-
vmx->vmcs = alloc_vmcs();
if (!vmx->vmcs)
goto free_msrs;
@@ -3836,8 +3881,6 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
free_vmcs:
free_vmcs(vmx->vmcs);
free_msrs:
- kfree(vmx->host_msrs);
-free_guest_msrs:
kfree(vmx->guest_msrs);
uninit_vcpu:
kvm_vcpu_uninit(&vmx->vcpu);
@@ -3973,6 +4016,8 @@ static struct kvm_x86_ops vmx_x86_ops = {
.queue_exception = vmx_queue_exception,
.interrupt_allowed = vmx_interrupt_allowed,
.nmi_allowed = vmx_nmi_allowed,
+ .get_nmi_mask = vmx_get_nmi_mask,
+ .set_nmi_mask = vmx_set_nmi_mask,
.enable_nmi_window = enable_nmi_window,
.enable_irq_window = enable_irq_window,
.update_cr8_intercept = update_cr8_intercept,
@@ -3987,7 +4032,12 @@ static struct kvm_x86_ops vmx_x86_ops = {
static int __init vmx_init(void)
{
- int r;
+ int r, i;
+
+ rdmsrl_safe(MSR_EFER, &host_efer);
+
+ for (i = 0; i < NR_VMX_MSR; ++i)
+ kvm_define_shared_msr(i, vmx_msr_index[i]);
vmx_io_bitmap_a = (unsigned long *)__get_free_page(GFP_KERNEL);
if (!vmx_io_bitmap_a)
@@ -4049,8 +4099,6 @@ static int __init vmx_init(void)
if (bypass_guest_pf)
kvm_mmu_set_nonpresent_ptes(~0xffeull, 0ull);
- ept_sync_global();
-
return 0;
out3:
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 4fc80174191c..9d068966fb2a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -37,6 +37,7 @@
#include <linux/iommu.h>
#include <linux/intel-iommu.h>
#include <linux/cpufreq.h>
+#include <linux/user-return-notifier.h>
#include <trace/events/kvm.h>
#undef TRACE_INCLUDE_FILE
#define CREATE_TRACE_POINTS
@@ -88,6 +89,25 @@ EXPORT_SYMBOL_GPL(kvm_x86_ops);
int ignore_msrs = 0;
module_param_named(ignore_msrs, ignore_msrs, bool, S_IRUGO | S_IWUSR);
+#define KVM_NR_SHARED_MSRS 16
+
+struct kvm_shared_msrs_global {
+ int nr;
+ struct kvm_shared_msr {
+ u32 msr;
+ u64 value;
+ } msrs[KVM_NR_SHARED_MSRS];
+};
+
+struct kvm_shared_msrs {
+ struct user_return_notifier urn;
+ bool registered;
+ u64 current_value[KVM_NR_SHARED_MSRS];
+};
+
+static struct kvm_shared_msrs_global __read_mostly shared_msrs_global;
+static DEFINE_PER_CPU(struct kvm_shared_msrs, shared_msrs);
+
struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "pf_fixed", VCPU_STAT(pf_fixed) },
{ "pf_guest", VCPU_STAT(pf_guest) },
@@ -124,6 +144,72 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ NULL }
};
+static void kvm_on_user_return(struct user_return_notifier *urn)
+{
+ unsigned slot;
+ struct kvm_shared_msr *global;
+ struct kvm_shared_msrs *locals
+ = container_of(urn, struct kvm_shared_msrs, urn);
+
+ for (slot = 0; slot < shared_msrs_global.nr; ++slot) {
+ global = &shared_msrs_global.msrs[slot];
+ if (global->value != locals->current_value[slot]) {
+ wrmsrl(global->msr, global->value);
+ locals->current_value[slot] = global->value;
+ }
+ }
+ locals->registered = false;
+ user_return_notifier_unregister(urn);
+}
+
+void kvm_define_shared_msr(unsigned slot, u32 msr)
+{
+ int cpu;
+ u64 value;
+
+ if (slot >= shared_msrs_global.nr)
+ shared_msrs_global.nr = slot + 1;
+ shared_msrs_global.msrs[slot].msr = msr;
+ rdmsrl_safe(msr, &value);
+ shared_msrs_global.msrs[slot].value = value;
+ for_each_online_cpu(cpu)
+ per_cpu(shared_msrs, cpu).current_value[slot] = value;
+}
+EXPORT_SYMBOL_GPL(kvm_define_shared_msr);
+
+static void kvm_shared_msr_cpu_online(void)
+{
+ unsigned i;
+ struct kvm_shared_msrs *locals = &__get_cpu_var(shared_msrs);
+
+ for (i = 0; i < shared_msrs_global.nr; ++i)
+ locals->current_value[i] = shared_msrs_global.msrs[i].value;
+}
+
+void kvm_set_shared_msr(unsigned slot, u64 value, u64 mask)
+{
+ struct kvm_shared_msrs *smsr = &__get_cpu_var(shared_msrs);
+
+ if (((value ^ smsr->current_value[slot]) & mask) == 0)
+ return;
+ smsr->current_value[slot] = value;
+ wrmsrl(shared_msrs_global.msrs[slot].msr, value);
+ if (!smsr->registered) {
+ smsr->urn.on_user_return = kvm_on_user_return;
+ user_return_notifier_register(&smsr->urn);
+ smsr->registered = true;
+ }
+}
+EXPORT_SYMBOL_GPL(kvm_set_shared_msr);
+
+static void drop_user_return_notifiers(void *ignore)
+{
+ struct kvm_shared_msrs *smsr = &__get_cpu_var(shared_msrs);
+
+ if (smsr->registered)
+ kvm_on_user_return(&smsr->urn);
+}
+
unsigned long segment_base(u16 selector)
{
struct descriptor_table gdt;
@@ -485,16 +571,19 @@ static inline u32 bit(int bitno)
* and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST.
*
* This list is modified at module load time to reflect the
- * capabilities of the host cpu.
+ * capabilities of the host cpu. This capabilities test skips MSRs that are
+ * kvm-specific. Those are put in the beginning of the list.
*/
+
+#define KVM_SAVE_MSRS_BEGIN 2
static u32 msrs_to_save[] = {
+ MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
MSR_K6_STAR,
#ifdef CONFIG_X86_64
MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
#endif
- MSR_IA32_TSC, MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
- MSR_IA32_PERF_STATUS, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA
+ MSR_IA32_TSC, MSR_IA32_PERF_STATUS, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA
};
static unsigned num_msrs_to_save;
@@ -678,7 +767,8 @@ static void kvm_write_guest_time(struct kvm_vcpu *v)
/* With all the info we got, fill in the values */
vcpu->hv_clock.system_time = ts.tv_nsec +
- (NSEC_PER_SEC * (u64)ts.tv_sec);
+ (NSEC_PER_SEC * (u64)ts.tv_sec) + v->kvm->arch.kvmclock_offset;
+
/*
* The interface expects us to write an even number signaling that the
* update is finished. Since the guest won't see the intermediate
@@ -836,6 +926,38 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 data)
return 0;
}
+static int xen_hvm_config(struct kvm_vcpu *vcpu, u64 data)
+{
+ struct kvm *kvm = vcpu->kvm;
+ int lm = is_long_mode(vcpu);
+ u8 *blob_addr = lm ? (u8 *)(long)kvm->arch.xen_hvm_config.blob_addr_64
+ : (u8 *)(long)kvm->arch.xen_hvm_config.blob_addr_32;
+ u8 blob_size = lm ? kvm->arch.xen_hvm_config.blob_size_64
+ : kvm->arch.xen_hvm_config.blob_size_32;
+ u32 page_num = data & ~PAGE_MASK;
+ u64 page_addr = data & PAGE_MASK;
+ u8 *page;
+ int r;
+
+ r = -E2BIG;
+ if (page_num >= blob_size)
+ goto out;
+ r = -ENOMEM;
+ page = kzalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!page)
+ goto out;
+ r = -EFAULT;
+ if (copy_from_user(page, blob_addr + (page_num * PAGE_SIZE), PAGE_SIZE))
+ goto out_free;
+ if (kvm_write_guest(kvm, page_addr, page, PAGE_SIZE))
+ goto out_free;
+ r = 0;
+out_free:
+ kfree(page);
+out:
+ return r;
+}
+
int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
{
switch (msr) {
@@ -951,6 +1073,8 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
"0x%x data 0x%llx\n", msr, data);
break;
default:
+ if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr))
+ return xen_hvm_config(vcpu, data);
if (!ignore_msrs) {
pr_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n",
msr, data);
@@ -1225,6 +1349,9 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_PIT2:
case KVM_CAP_PIT_STATE2:
case KVM_CAP_SET_IDENTITY_MAP_ADDR:
+ case KVM_CAP_XEN_HVM:
+ case KVM_CAP_ADJUST_CLOCK:
+ case KVM_CAP_VCPU_EVENTS:
r = 1;
break;
case KVM_CAP_COALESCED_MMIO:
@@ -1239,8 +1366,8 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_NR_MEMSLOTS:
r = KVM_MEMORY_SLOTS;
break;
- case KVM_CAP_PV_MMU:
- r = !tdp_enabled;
+ case KVM_CAP_PV_MMU: /* obsolete */
+ r = 0;
break;
case KVM_CAP_IOMMU:
r = iommu_found();
@@ -1327,6 +1454,12 @@ out:
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
kvm_x86_ops->vcpu_load(vcpu, cpu);
+ if (unlikely(per_cpu(cpu_tsc_khz, cpu) == 0)) {
+ unsigned long khz = cpufreq_quick_get(cpu);
+ if (!khz)
+ khz = tsc_khz;
+ per_cpu(cpu_tsc_khz, cpu) = khz;
+ }
kvm_request_guest_time_update(vcpu);
}
@@ -1760,6 +1893,61 @@ static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu,
return 0;
}
+static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
+ struct kvm_vcpu_events *events)
+{
+ vcpu_load(vcpu);
+
+ events->exception.injected = vcpu->arch.exception.pending;
+ events->exception.nr = vcpu->arch.exception.nr;
+ events->exception.has_error_code = vcpu->arch.exception.has_error_code;
+ events->exception.error_code = vcpu->arch.exception.error_code;
+
+ events->interrupt.injected = vcpu->arch.interrupt.pending;
+ events->interrupt.nr = vcpu->arch.interrupt.nr;
+ events->interrupt.soft = vcpu->arch.interrupt.soft;
+
+ events->nmi.injected = vcpu->arch.nmi_injected;
+ events->nmi.pending = vcpu->arch.nmi_pending;
+ events->nmi.masked = kvm_x86_ops->get_nmi_mask(vcpu);
+
+ events->sipi_vector = vcpu->arch.sipi_vector;
+
+ events->flags = 0;
+
+ vcpu_put(vcpu);
+}
+
+static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
+ struct kvm_vcpu_events *events)
+{
+ if (events->flags)
+ return -EINVAL;
+
+ vcpu_load(vcpu);
+
+ vcpu->arch.exception.pending = events->exception.injected;
+ vcpu->arch.exception.nr = events->exception.nr;
+ vcpu->arch.exception.has_error_code = events->exception.has_error_code;
+ vcpu->arch.exception.error_code = events->exception.error_code;
+
+ vcpu->arch.interrupt.pending = events->interrupt.injected;
+ vcpu->arch.interrupt.nr = events->interrupt.nr;
+ vcpu->arch.interrupt.soft = events->interrupt.soft;
+ if (vcpu->arch.interrupt.pending && irqchip_in_kernel(vcpu->kvm))
+ kvm_pic_clear_isr_ack(vcpu->kvm);
+
+ vcpu->arch.nmi_injected = events->nmi.injected;
+ vcpu->arch.nmi_pending = events->nmi.pending;
+ kvm_x86_ops->set_nmi_mask(vcpu, events->nmi.masked);
+
+ vcpu->arch.sipi_vector = events->sipi_vector;
+
+ vcpu_put(vcpu);
+
+ return 0;
+}
+
long kvm_arch_vcpu_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
@@ -1770,6 +1958,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
switch (ioctl) {
case KVM_GET_LAPIC: {
+ r = -EINVAL;
+ if (!vcpu->arch.apic)
+ goto out;
lapic = kzalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL);
r = -ENOMEM;
@@ -1785,6 +1976,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
break;
}
case KVM_SET_LAPIC: {
+ r = -EINVAL;
+ if (!vcpu->arch.apic)
+ goto out;
lapic = kmalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL);
r = -ENOMEM;
if (!lapic)
@@ -1911,6 +2105,27 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce);
break;
}
+ case KVM_GET_VCPU_EVENTS: {
+ struct kvm_vcpu_events events;
+
+ kvm_vcpu_ioctl_x86_get_vcpu_events(vcpu, &events);
+
+ r = -EFAULT;
+ if (copy_to_user(argp, &events, sizeof(struct kvm_vcpu_events)))
+ break;
+ r = 0;
+ break;
+ }
+ case KVM_SET_VCPU_EVENTS: {
+ struct kvm_vcpu_events events;
+
+ r = -EFAULT;
+ if (copy_from_user(&events, argp, sizeof(struct kvm_vcpu_events)))
+ break;
+
+ r = kvm_vcpu_ioctl_x86_set_vcpu_events(vcpu, &events);
+ break;
+ }
default:
r = -EINVAL;
}
@@ -2039,9 +2254,7 @@ static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
sizeof(struct kvm_pic_state));
break;
case KVM_IRQCHIP_IOAPIC:
- memcpy(&chip->chip.ioapic,
- ioapic_irqchip(kvm),
- sizeof(struct kvm_ioapic_state));
+ r = kvm_get_ioapic(kvm, &chip->chip.ioapic);
break;
default:
r = -EINVAL;
@@ -2071,11 +2284,7 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
spin_unlock(&pic_irqchip(kvm)->lock);
break;
case KVM_IRQCHIP_IOAPIC:
- mutex_lock(&kvm->irq_lock);
- memcpy(ioapic_irqchip(kvm),
- &chip->chip.ioapic,
- sizeof(struct kvm_ioapic_state));
- mutex_unlock(&kvm->irq_lock);
+ r = kvm_set_ioapic(kvm, &chip->chip.ioapic);
break;
default:
r = -EINVAL;
@@ -2183,7 +2392,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
{
struct kvm *kvm = filp->private_data;
void __user *argp = (void __user *)arg;
- int r = -EINVAL;
+ int r = -ENOTTY;
/*
* This union makes it completely explicit to gcc-3.x
* that these two variables' stack usage should be
@@ -2245,25 +2454,39 @@ long kvm_arch_vm_ioctl(struct file *filp,
if (r)
goto out;
break;
- case KVM_CREATE_IRQCHIP:
+ case KVM_CREATE_IRQCHIP: {
+ struct kvm_pic *vpic;
+
+ mutex_lock(&kvm->lock);
+ r = -EEXIST;
+ if (kvm->arch.vpic)
+ goto create_irqchip_unlock;
r = -ENOMEM;
- kvm->arch.vpic = kvm_create_pic(kvm);
- if (kvm->arch.vpic) {
+ vpic = kvm_create_pic(kvm);
+ if (vpic) {
r = kvm_ioapic_init(kvm);
if (r) {
- kfree(kvm->arch.vpic);
- kvm->arch.vpic = NULL;
- goto out;
+ kfree(vpic);
+ goto create_irqchip_unlock;
}
} else
- goto out;
+ goto create_irqchip_unlock;
+ smp_wmb();
+ kvm->arch.vpic = vpic;
+ smp_wmb();
r = kvm_setup_default_irq_routing(kvm);
if (r) {
+ mutex_lock(&kvm->irq_lock);
kfree(kvm->arch.vpic);
kfree(kvm->arch.vioapic);
- goto out;
+ kvm->arch.vpic = NULL;
+ kvm->arch.vioapic = NULL;
+ mutex_unlock(&kvm->irq_lock);
}
+ create_irqchip_unlock:
+ mutex_unlock(&kvm->lock);
break;
+ }
case KVM_CREATE_PIT:
u.pit_config.flags = KVM_PIT_SPEAKER_DUMMY;
goto create_pit;
@@ -2293,10 +2516,8 @@ long kvm_arch_vm_ioctl(struct file *filp,
goto out;
if (irqchip_in_kernel(kvm)) {
__s32 status;
- mutex_lock(&kvm->irq_lock);
status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
irq_event.irq, irq_event.level);
- mutex_unlock(&kvm->irq_lock);
if (ioctl == KVM_IRQ_LINE_STATUS) {
irq_event.status = status;
if (copy_to_user(argp, &irq_event,
@@ -2422,6 +2643,55 @@ long kvm_arch_vm_ioctl(struct file *filp,
r = 0;
break;
}
+ case KVM_XEN_HVM_CONFIG: {
+ r = -EFAULT;
+ if (copy_from_user(&kvm->arch.xen_hvm_config, argp,
+ sizeof(struct kvm_xen_hvm_config)))
+ goto out;
+ r = -EINVAL;
+ if (kvm->arch.xen_hvm_config.flags)
+ goto out;
+ r = 0;
+ break;
+ }
+ case KVM_SET_CLOCK: {
+ struct timespec now;
+ struct kvm_clock_data user_ns;
+ u64 now_ns;
+ s64 delta;
+
+ r = -EFAULT;
+ if (copy_from_user(&user_ns, argp, sizeof(user_ns)))
+ goto out;
+
+ r = -EINVAL;
+ if (user_ns.flags)
+ goto out;
+
+ r = 0;
+ ktime_get_ts(&now);
+ now_ns = timespec_to_ns(&now);
+ delta = user_ns.clock - now_ns;
+ kvm->arch.kvmclock_offset = delta;
+ break;
+ }
+ case KVM_GET_CLOCK: {
+ struct timespec now;
+ struct kvm_clock_data user_ns;
+ u64 now_ns;
+
+ ktime_get_ts(&now);
+ now_ns = timespec_to_ns(&now);
+ user_ns.clock = kvm->arch.kvmclock_offset + now_ns;
+ user_ns.flags = 0;
+
+ r = -EFAULT;
+ if (copy_to_user(argp, &user_ns, sizeof(user_ns)))
+ goto out;
+ r = 0;
+ break;
+ }
+
default:
;
}
@@ -2434,7 +2704,8 @@ static void kvm_init_msr_list(void)
u32 dummy[2];
unsigned i, j;
- for (i = j = 0; i < ARRAY_SIZE(msrs_to_save); i++) {
+ /* skip the first msrs in the list. KVM-specific */
+ for (i = j = KVM_SAVE_MSRS_BEGIN; i < ARRAY_SIZE(msrs_to_save); i++) {
if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0)
continue;
if (j < i)
@@ -2758,13 +3029,13 @@ static void cache_all_regs(struct kvm_vcpu *vcpu)
}
int emulate_instruction(struct kvm_vcpu *vcpu,
- struct kvm_run *run,
unsigned long cr2,
u16 error_code,
int emulation_type)
{
int r, shadow_mask;
struct decode_cache *c;
+ struct kvm_run *run = vcpu->run;
kvm_clear_exception_queue(vcpu);
vcpu->arch.mmio_fault_cr2 = cr2;
@@ -2784,7 +3055,7 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
vcpu->arch.emulate_ctxt.vcpu = vcpu;
- vcpu->arch.emulate_ctxt.eflags = kvm_x86_ops->get_rflags(vcpu);
+ vcpu->arch.emulate_ctxt.eflags = kvm_get_rflags(vcpu);
vcpu->arch.emulate_ctxt.mode =
(vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM)
? X86EMUL_MODE_REAL : cs_l
@@ -2862,7 +3133,7 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
return EMULATE_DO_MMIO;
}
- kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags);
+ kvm_set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags);
if (vcpu->mmio_is_write) {
vcpu->mmio_needed = 0;
@@ -2970,8 +3241,7 @@ static int pio_string_write(struct kvm_vcpu *vcpu)
return r;
}
-int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
- int size, unsigned port)
+int kvm_emulate_pio(struct kvm_vcpu *vcpu, int in, int size, unsigned port)
{
unsigned long val;
@@ -3000,7 +3270,7 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
}
EXPORT_SYMBOL_GPL(kvm_emulate_pio);
-int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
+int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, int in,
int size, unsigned long count, int down,
gva_t address, int rep, unsigned port)
{
@@ -3073,9 +3343,6 @@ static void bounce_off(void *info)
/* nothing */
}
-static unsigned int ref_freq;
-static unsigned long tsc_khz_ref;
-
static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
void *data)
{
@@ -3084,14 +3351,11 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va
struct kvm_vcpu *vcpu;
int i, send_ipi = 0;
- if (!ref_freq)
- ref_freq = freq->old;
-
if (val == CPUFREQ_PRECHANGE && freq->old > freq->new)
return 0;
if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new)
return 0;
- per_cpu(cpu_tsc_khz, freq->cpu) = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new);
+ per_cpu(cpu_tsc_khz, freq->cpu) = freq->new;
spin_lock(&kvm_lock);
list_for_each_entry(kvm, &vm_list, vm_list) {
@@ -3128,9 +3392,28 @@ static struct notifier_block kvmclock_cpufreq_notifier_block = {
.notifier_call = kvmclock_cpufreq_notifier
};
+static void kvm_timer_init(void)
+{
+ int cpu;
+
+ if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
+ cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
+ CPUFREQ_TRANSITION_NOTIFIER);
+ for_each_online_cpu(cpu) {
+ unsigned long khz = cpufreq_get(cpu);
+ if (!khz)
+ khz = tsc_khz;
+ per_cpu(cpu_tsc_khz, cpu) = khz;
+ }
+ } else {
+ for_each_possible_cpu(cpu)
+ per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
+ }
+}
+
int kvm_arch_init(void *opaque)
{
- int r, cpu;
+ int r;
struct kvm_x86_ops *ops = (struct kvm_x86_ops *)opaque;
if (kvm_x86_ops) {
@@ -3162,13 +3445,7 @@ int kvm_arch_init(void *opaque)
kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
PT_DIRTY_MASK, PT64_NX_MASK, 0);
- for_each_possible_cpu(cpu)
- per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
- if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
- tsc_khz_ref = tsc_khz;
- cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
- CPUFREQ_TRANSITION_NOTIFIER);
- }
+ kvm_timer_init();
return 0;
@@ -3296,7 +3573,7 @@ void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw,
unsigned long *rflags)
{
kvm_lmsw(vcpu, msw);
- *rflags = kvm_x86_ops->get_rflags(vcpu);
+ *rflags = kvm_get_rflags(vcpu);
}
unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr)
@@ -3334,7 +3611,7 @@ void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val,
switch (cr) {
case 0:
kvm_set_cr0(vcpu, mk_cr_64(vcpu->arch.cr0, val));
- *rflags = kvm_x86_ops->get_rflags(vcpu);
+ *rflags = kvm_get_rflags(vcpu);
break;
case 2:
vcpu->arch.cr2 = val;
@@ -3454,18 +3731,18 @@ EXPORT_SYMBOL_GPL(kvm_emulate_cpuid);
*
* No need to exit to userspace if we already have an interrupt queued.
*/
-static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu,
- struct kvm_run *kvm_run)
+static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu)
{
return (!irqchip_in_kernel(vcpu->kvm) && !kvm_cpu_has_interrupt(vcpu) &&
- kvm_run->request_interrupt_window &&
+ vcpu->run->request_interrupt_window &&
kvm_arch_interrupt_allowed(vcpu));
}
-static void post_kvm_run_save(struct kvm_vcpu *vcpu,
- struct kvm_run *kvm_run)
+static void post_kvm_run_save(struct kvm_vcpu *vcpu)
{
- kvm_run->if_flag = (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
+ struct kvm_run *kvm_run = vcpu->run;
+
+ kvm_run->if_flag = (kvm_get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
kvm_run->cr8 = kvm_get_cr8(vcpu);
kvm_run->apic_base = kvm_get_apic_base(vcpu);
if (irqchip_in_kernel(vcpu->kvm))
@@ -3526,7 +3803,7 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu)
kvm_x86_ops->update_cr8_intercept(vcpu, tpr, max_irr);
}
-static void inject_pending_event(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+static void inject_pending_event(struct kvm_vcpu *vcpu)
{
/* try to reinject previous events if any */
if (vcpu->arch.exception.pending) {
@@ -3562,11 +3839,11 @@ static void inject_pending_event(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
}
}
-static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
{
int r;
bool req_int_win = !irqchip_in_kernel(vcpu->kvm) &&
- kvm_run->request_interrupt_window;
+ vcpu->run->request_interrupt_window;
if (vcpu->requests)
if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests))
@@ -3587,12 +3864,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
kvm_x86_ops->tlb_flush(vcpu);
if (test_and_clear_bit(KVM_REQ_REPORT_TPR_ACCESS,
&vcpu->requests)) {
- kvm_run->exit_reason = KVM_EXIT_TPR_ACCESS;
+ vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS;
r = 0;
goto out;
}
if (test_and_clear_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests)) {
- kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
+ vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
r = 0;
goto out;
}
@@ -3616,7 +3893,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
goto out;
}
- inject_pending_event(vcpu, kvm_run);
+ inject_pending_event(vcpu);
/* enable NMI/IRQ window open exits if needed */
if (vcpu->arch.nmi_pending)
@@ -3642,7 +3919,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
}
trace_kvm_entry(vcpu->vcpu_id);
- kvm_x86_ops->run(vcpu, kvm_run);
+ kvm_x86_ops->run(vcpu);
/*
* If the guest has used debug registers, at least dr7
@@ -3684,13 +3961,13 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
kvm_lapic_sync_from_vapic(vcpu);
- r = kvm_x86_ops->handle_exit(kvm_run, vcpu);
+ r = kvm_x86_ops->handle_exit(vcpu);
out:
return r;
}
-static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+static int __vcpu_run(struct kvm_vcpu *vcpu)
{
int r;
@@ -3710,7 +3987,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
r = 1;
while (r > 0) {
if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE)
- r = vcpu_enter_guest(vcpu, kvm_run);
+ r = vcpu_enter_guest(vcpu);
else {
up_read(&vcpu->kvm->slots_lock);
kvm_vcpu_block(vcpu);
@@ -3738,14 +4015,14 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
if (kvm_cpu_has_pending_timer(vcpu))
kvm_inject_pending_timer_irqs(vcpu);
- if (dm_request_for_irq_injection(vcpu, kvm_run)) {
+ if (dm_request_for_irq_injection(vcpu)) {
r = -EINTR;
- kvm_run->exit_reason = KVM_EXIT_INTR;
+ vcpu->run->exit_reason = KVM_EXIT_INTR;
++vcpu->stat.request_irq_exits;
}
if (signal_pending(current)) {
r = -EINTR;
- kvm_run->exit_reason = KVM_EXIT_INTR;
+ vcpu->run->exit_reason = KVM_EXIT_INTR;
++vcpu->stat.signal_exits;
}
if (need_resched()) {
@@ -3756,7 +4033,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
}
up_read(&vcpu->kvm->slots_lock);
- post_kvm_run_save(vcpu, kvm_run);
+ post_kvm_run_save(vcpu);
vapic_exit(vcpu);
@@ -3789,15 +4066,13 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
if (r)
goto out;
}
-#if CONFIG_HAS_IOMEM
if (vcpu->mmio_needed) {
memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8);
vcpu->mmio_read_completed = 1;
vcpu->mmio_needed = 0;
down_read(&vcpu->kvm->slots_lock);
- r = emulate_instruction(vcpu, kvm_run,
- vcpu->arch.mmio_fault_cr2, 0,
+ r = emulate_instruction(vcpu, vcpu->arch.mmio_fault_cr2, 0,
EMULTYPE_NO_DECODE);
up_read(&vcpu->kvm->slots_lock);
if (r == EMULATE_DO_MMIO) {
@@ -3808,12 +4083,11 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
goto out;
}
}
-#endif
if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL)
kvm_register_write(vcpu, VCPU_REGS_RAX,
kvm_run->hypercall.ret);
- r = __vcpu_run(vcpu, kvm_run);
+ r = __vcpu_run(vcpu);
out:
if (vcpu->sigset_active)
@@ -3847,13 +4121,7 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
#endif
regs->rip = kvm_rip_read(vcpu);
- regs->rflags = kvm_x86_ops->get_rflags(vcpu);
-
- /*
- * Don't leak debug flags in case they were set for guest debugging
- */
- if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
- regs->rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
+ regs->rflags = kvm_get_rflags(vcpu);
vcpu_put(vcpu);
@@ -3881,12 +4149,10 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
kvm_register_write(vcpu, VCPU_REGS_R13, regs->r13);
kvm_register_write(vcpu, VCPU_REGS_R14, regs->r14);
kvm_register_write(vcpu, VCPU_REGS_R15, regs->r15);
-
#endif
kvm_rip_write(vcpu, regs->rip);
- kvm_x86_ops->set_rflags(vcpu, regs->rflags);
-
+ kvm_set_rflags(vcpu, regs->rflags);
vcpu->arch.exception.pending = false;
@@ -4105,7 +4371,7 @@ static int is_vm86_segment(struct kvm_vcpu *vcpu, int seg)
{
return (seg != VCPU_SREG_LDTR) &&
(seg != VCPU_SREG_TR) &&
- (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_VM);
+ (kvm_get_rflags(vcpu) & X86_EFLAGS_VM);
}
int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
@@ -4133,7 +4399,7 @@ static void save_state_to_tss32(struct kvm_vcpu *vcpu,
{
tss->cr3 = vcpu->arch.cr3;
tss->eip = kvm_rip_read(vcpu);
- tss->eflags = kvm_x86_ops->get_rflags(vcpu);
+ tss->eflags = kvm_get_rflags(vcpu);
tss->eax = kvm_register_read(vcpu, VCPU_REGS_RAX);
tss->ecx = kvm_register_read(vcpu, VCPU_REGS_RCX);
tss->edx = kvm_register_read(vcpu, VCPU_REGS_RDX);
@@ -4157,7 +4423,7 @@ static int load_state_from_tss32(struct kvm_vcpu *vcpu,
kvm_set_cr3(vcpu, tss->cr3);
kvm_rip_write(vcpu, tss->eip);
- kvm_x86_ops->set_rflags(vcpu, tss->eflags | 2);
+ kvm_set_rflags(vcpu, tss->eflags | 2);
kvm_register_write(vcpu, VCPU_REGS_RAX, tss->eax);
kvm_register_write(vcpu, VCPU_REGS_RCX, tss->ecx);
@@ -4195,7 +4461,7 @@ static void save_state_to_tss16(struct kvm_vcpu *vcpu,
struct tss_segment_16 *tss)
{
tss->ip = kvm_rip_read(vcpu);
- tss->flag = kvm_x86_ops->get_rflags(vcpu);
+ tss->flag = kvm_get_rflags(vcpu);
tss->ax = kvm_register_read(vcpu, VCPU_REGS_RAX);
tss->cx = kvm_register_read(vcpu, VCPU_REGS_RCX);
tss->dx = kvm_register_read(vcpu, VCPU_REGS_RDX);
@@ -4210,14 +4476,13 @@ static void save_state_to_tss16(struct kvm_vcpu *vcpu,
tss->ss = get_segment_selector(vcpu, VCPU_SREG_SS);
tss->ds = get_segment_selector(vcpu, VCPU_SREG_DS);
tss->ldt = get_segment_selector(vcpu, VCPU_SREG_LDTR);
- tss->prev_task_link = get_segment_selector(vcpu, VCPU_SREG_TR);
}
static int load_state_from_tss16(struct kvm_vcpu *vcpu,
struct tss_segment_16 *tss)
{
kvm_rip_write(vcpu, tss->ip);
- kvm_x86_ops->set_rflags(vcpu, tss->flag | 2);
+ kvm_set_rflags(vcpu, tss->flag | 2);
kvm_register_write(vcpu, VCPU_REGS_RAX, tss->ax);
kvm_register_write(vcpu, VCPU_REGS_RCX, tss->cx);
kvm_register_write(vcpu, VCPU_REGS_RDX, tss->dx);
@@ -4363,8 +4628,8 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
}
if (reason == TASK_SWITCH_IRET) {
- u32 eflags = kvm_x86_ops->get_rflags(vcpu);
- kvm_x86_ops->set_rflags(vcpu, eflags & ~X86_EFLAGS_NT);
+ u32 eflags = kvm_get_rflags(vcpu);
+ kvm_set_rflags(vcpu, eflags & ~X86_EFLAGS_NT);
}
/* set back link to prev task only if NT bit is set in eflags
@@ -4372,11 +4637,6 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE)
old_tss_sel = 0xffff;
- /* set back link to prev task only if NT bit is set in eflags
- note that old_tss_sel is not used afetr this point */
- if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE)
- old_tss_sel = 0xffff;
-
if (nseg_desc.type & 8)
ret = kvm_task_switch_32(vcpu, tss_selector, old_tss_sel,
old_tss_base, &nseg_desc);
@@ -4385,8 +4645,8 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
old_tss_base, &nseg_desc);
if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE) {
- u32 eflags = kvm_x86_ops->get_rflags(vcpu);
- kvm_x86_ops->set_rflags(vcpu, eflags | X86_EFLAGS_NT);
+ u32 eflags = kvm_get_rflags(vcpu);
+ kvm_set_rflags(vcpu, eflags | X86_EFLAGS_NT);
}
if (reason != TASK_SWITCH_IRET) {
@@ -4438,8 +4698,10 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
mmu_reset_needed |= vcpu->arch.cr4 != sregs->cr4;
kvm_x86_ops->set_cr4(vcpu, sregs->cr4);
- if (!is_long_mode(vcpu) && is_pae(vcpu))
+ if (!is_long_mode(vcpu) && is_pae(vcpu)) {
load_pdptrs(vcpu, vcpu->arch.cr3);
+ mmu_reset_needed = 1;
+ }
if (mmu_reset_needed)
kvm_mmu_reset_context(vcpu);
@@ -4480,12 +4742,32 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
struct kvm_guest_debug *dbg)
{
+ unsigned long rflags;
int i, r;
vcpu_load(vcpu);
- if ((dbg->control & (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP)) ==
- (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP)) {
+ if (dbg->control & (KVM_GUESTDBG_INJECT_DB | KVM_GUESTDBG_INJECT_BP)) {
+ r = -EBUSY;
+ if (vcpu->arch.exception.pending)
+ goto unlock_out;
+ if (dbg->control & KVM_GUESTDBG_INJECT_DB)
+ kvm_queue_exception(vcpu, DB_VECTOR);
+ else
+ kvm_queue_exception(vcpu, BP_VECTOR);
+ }
+
+ /*
+ * Read rflags as long as potentially injected trace flags are still
+ * filtered out.
+ */
+ rflags = kvm_get_rflags(vcpu);
+
+ vcpu->guest_debug = dbg->control;
+ if (!(vcpu->guest_debug & KVM_GUESTDBG_ENABLE))
+ vcpu->guest_debug = 0;
+
+ if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
for (i = 0; i < KVM_NR_DB_REGS; ++i)
vcpu->arch.eff_db[i] = dbg->arch.debugreg[i];
vcpu->arch.switch_db_regs =
@@ -4496,13 +4778,23 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
vcpu->arch.switch_db_regs = (vcpu->arch.dr7 & DR7_BP_EN_MASK);
}
- r = kvm_x86_ops->set_guest_debug(vcpu, dbg);
+ if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
+ vcpu->arch.singlestep_cs =
+ get_segment_selector(vcpu, VCPU_SREG_CS);
+ vcpu->arch.singlestep_rip = kvm_rip_read(vcpu);
+ }
+
+ /*
+ * Trigger an rflags update that will inject or remove the trace
+ * flags.
+ */
+ kvm_set_rflags(vcpu, rflags);
+
+ kvm_x86_ops->set_guest_debug(vcpu, dbg);
- if (dbg->control & KVM_GUESTDBG_INJECT_DB)
- kvm_queue_exception(vcpu, DB_VECTOR);
- else if (dbg->control & KVM_GUESTDBG_INJECT_BP)
- kvm_queue_exception(vcpu, BP_VECTOR);
+ r = 0;
+unlock_out:
vcpu_put(vcpu);
return r;
@@ -4703,14 +4995,26 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
return kvm_x86_ops->vcpu_reset(vcpu);
}
-void kvm_arch_hardware_enable(void *garbage)
+int kvm_arch_hardware_enable(void *garbage)
{
- kvm_x86_ops->hardware_enable(garbage);
+ /*
+ * Since this may be called from a hotplug notifcation,
+ * we can't get the CPU frequency directly.
+ */
+ if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
+ int cpu = raw_smp_processor_id();
+ per_cpu(cpu_tsc_khz, cpu) = 0;
+ }
+
+ kvm_shared_msr_cpu_online();
+
+ return kvm_x86_ops->hardware_enable(garbage);
}
void kvm_arch_hardware_disable(void *garbage)
{
kvm_x86_ops->hardware_disable(garbage);
+ drop_user_return_notifiers(garbage);
}
int kvm_arch_hardware_setup(void)
@@ -4948,8 +5252,36 @@ int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
return kvm_x86_ops->interrupt_allowed(vcpu);
}
+unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu)
+{
+ unsigned long rflags;
+
+ rflags = kvm_x86_ops->get_rflags(vcpu);
+ if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
+ rflags &= ~(unsigned long)(X86_EFLAGS_TF | X86_EFLAGS_RF);
+ return rflags;
+}
+EXPORT_SYMBOL_GPL(kvm_get_rflags);
+
+void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
+{
+ if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP &&
+ vcpu->arch.singlestep_cs ==
+ get_segment_selector(vcpu, VCPU_SREG_CS) &&
+ vcpu->arch.singlestep_rip == kvm_rip_read(vcpu))
+ rflags |= X86_EFLAGS_TF | X86_EFLAGS_RF;
+ kvm_x86_ops->set_rflags(vcpu, rflags);
+}
+EXPORT_SYMBOL_GPL(kvm_set_rflags);
+
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_msr);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_cr);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmrun);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit_inject);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit);
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index a2d6472895fb..cffd754f3039 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -5,7 +5,7 @@
inat_tables_script = $(srctree)/arch/x86/tools/gen-insn-attr-x86.awk
inat_tables_maps = $(srctree)/arch/x86/lib/x86-opcode-map.txt
quiet_cmd_inat_tables = GEN $@
- cmd_inat_tables = $(AWK) -f $(inat_tables_script) $(inat_tables_maps) > $@
+ cmd_inat_tables = $(AWK) -f $(inat_tables_script) $(inat_tables_maps) > $@ || rm -f $@
$(obj)/inat-tables.c: $(inat_tables_script) $(inat_tables_maps)
$(call cmd,inat_tables)
@@ -14,15 +14,15 @@ $(obj)/inat.o: $(obj)/inat-tables.c
clean-files := inat-tables.c
-obj-$(CONFIG_SMP) := msr.o
+obj-$(CONFIG_SMP) += msr-smp.o
lib-y := delay.o
lib-y += thunk_$(BITS).o
lib-y += usercopy_$(BITS).o getuser.o putuser.o
lib-y += memcpy_$(BITS).o
-lib-y += insn.o inat.o
+lib-$(CONFIG_KPROBES) += insn.o inat.o
-obj-y += msr-reg.o msr-reg-export.o
+obj-y += msr.o msr-reg.o msr-reg-export.o
ifeq ($(CONFIG_X86_32),y)
obj-y += atomic64_32.o
diff --git a/arch/x86/lib/msr-smp.c b/arch/x86/lib/msr-smp.c
new file mode 100644
index 000000000000..a6b1b86d2253
--- /dev/null
+++ b/arch/x86/lib/msr-smp.c
@@ -0,0 +1,204 @@
+#include <linux/module.h>
+#include <linux/preempt.h>
+#include <linux/smp.h>
+#include <asm/msr.h>
+
+static void __rdmsr_on_cpu(void *info)
+{
+ struct msr_info *rv = info;
+ struct msr *reg;
+ int this_cpu = raw_smp_processor_id();
+
+ if (rv->msrs)
+ reg = per_cpu_ptr(rv->msrs, this_cpu);
+ else
+ reg = &rv->reg;
+
+ rdmsr(rv->msr_no, reg->l, reg->h);
+}
+
+static void __wrmsr_on_cpu(void *info)
+{
+ struct msr_info *rv = info;
+ struct msr *reg;
+ int this_cpu = raw_smp_processor_id();
+
+ if (rv->msrs)
+ reg = per_cpu_ptr(rv->msrs, this_cpu);
+ else
+ reg = &rv->reg;
+
+ wrmsr(rv->msr_no, reg->l, reg->h);
+}
+
+int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h)
+{
+ int err;
+ struct msr_info rv;
+
+ memset(&rv, 0, sizeof(rv));
+
+ rv.msr_no = msr_no;
+ err = smp_call_function_single(cpu, __rdmsr_on_cpu, &rv, 1);
+ *l = rv.reg.l;
+ *h = rv.reg.h;
+
+ return err;
+}
+EXPORT_SYMBOL(rdmsr_on_cpu);
+
+int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
+{
+ int err;
+ struct msr_info rv;
+
+ memset(&rv, 0, sizeof(rv));
+
+ rv.msr_no = msr_no;
+ rv.reg.l = l;
+ rv.reg.h = h;
+ err = smp_call_function_single(cpu, __wrmsr_on_cpu, &rv, 1);
+
+ return err;
+}
+EXPORT_SYMBOL(wrmsr_on_cpu);
+
+static void __rwmsr_on_cpus(const struct cpumask *mask, u32 msr_no,
+ struct msr *msrs,
+ void (*msr_func) (void *info))
+{
+ struct msr_info rv;
+ int this_cpu;
+
+ memset(&rv, 0, sizeof(rv));
+
+ rv.msrs = msrs;
+ rv.msr_no = msr_no;
+
+ this_cpu = get_cpu();
+
+ if (cpumask_test_cpu(this_cpu, mask))
+ msr_func(&rv);
+
+ smp_call_function_many(mask, msr_func, &rv, 1);
+ put_cpu();
+}
+
+/* rdmsr on a bunch of CPUs
+ *
+ * @mask: which CPUs
+ * @msr_no: which MSR
+ * @msrs: array of MSR values
+ *
+ */
+void rdmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs)
+{
+ __rwmsr_on_cpus(mask, msr_no, msrs, __rdmsr_on_cpu);
+}
+EXPORT_SYMBOL(rdmsr_on_cpus);
+
+/*
+ * wrmsr on a bunch of CPUs
+ *
+ * @mask: which CPUs
+ * @msr_no: which MSR
+ * @msrs: array of MSR values
+ *
+ */
+void wrmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs)
+{
+ __rwmsr_on_cpus(mask, msr_no, msrs, __wrmsr_on_cpu);
+}
+EXPORT_SYMBOL(wrmsr_on_cpus);
+
+/* These "safe" variants are slower and should be used when the target MSR
+ may not actually exist. */
+static void __rdmsr_safe_on_cpu(void *info)
+{
+ struct msr_info *rv = info;
+
+ rv->err = rdmsr_safe(rv->msr_no, &rv->reg.l, &rv->reg.h);
+}
+
+static void __wrmsr_safe_on_cpu(void *info)
+{
+ struct msr_info *rv = info;
+
+ rv->err = wrmsr_safe(rv->msr_no, rv->reg.l, rv->reg.h);
+}
+
+int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h)
+{
+ int err;
+ struct msr_info rv;
+
+ memset(&rv, 0, sizeof(rv));
+
+ rv.msr_no = msr_no;
+ err = smp_call_function_single(cpu, __rdmsr_safe_on_cpu, &rv, 1);
+ *l = rv.reg.l;
+ *h = rv.reg.h;
+
+ return err ? err : rv.err;
+}
+EXPORT_SYMBOL(rdmsr_safe_on_cpu);
+
+int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
+{
+ int err;
+ struct msr_info rv;
+
+ memset(&rv, 0, sizeof(rv));
+
+ rv.msr_no = msr_no;
+ rv.reg.l = l;
+ rv.reg.h = h;
+ err = smp_call_function_single(cpu, __wrmsr_safe_on_cpu, &rv, 1);
+
+ return err ? err : rv.err;
+}
+EXPORT_SYMBOL(wrmsr_safe_on_cpu);
+
+/*
+ * These variants are significantly slower, but allows control over
+ * the entire 32-bit GPR set.
+ */
+static void __rdmsr_safe_regs_on_cpu(void *info)
+{
+ struct msr_regs_info *rv = info;
+
+ rv->err = rdmsr_safe_regs(rv->regs);
+}
+
+static void __wrmsr_safe_regs_on_cpu(void *info)
+{
+ struct msr_regs_info *rv = info;
+
+ rv->err = wrmsr_safe_regs(rv->regs);
+}
+
+int rdmsr_safe_regs_on_cpu(unsigned int cpu, u32 *regs)
+{
+ int err;
+ struct msr_regs_info rv;
+
+ rv.regs = regs;
+ rv.err = -EIO;
+ err = smp_call_function_single(cpu, __rdmsr_safe_regs_on_cpu, &rv, 1);
+
+ return err ? err : rv.err;
+}
+EXPORT_SYMBOL(rdmsr_safe_regs_on_cpu);
+
+int wrmsr_safe_regs_on_cpu(unsigned int cpu, u32 *regs)
+{
+ int err;
+ struct msr_regs_info rv;
+
+ rv.regs = regs;
+ rv.err = -EIO;
+ err = smp_call_function_single(cpu, __wrmsr_safe_regs_on_cpu, &rv, 1);
+
+ return err ? err : rv.err;
+}
+EXPORT_SYMBOL(wrmsr_safe_regs_on_cpu);
diff --git a/arch/x86/lib/msr.c b/arch/x86/lib/msr.c
index 41628b104b9e..8f8eebdca7d4 100644
--- a/arch/x86/lib/msr.c
+++ b/arch/x86/lib/msr.c
@@ -1,218 +1,23 @@
#include <linux/module.h>
#include <linux/preempt.h>
-#include <linux/smp.h>
#include <asm/msr.h>
-struct msr_info {
- u32 msr_no;
- struct msr reg;
- struct msr *msrs;
- int off;
- int err;
-};
-
-static void __rdmsr_on_cpu(void *info)
-{
- struct msr_info *rv = info;
- struct msr *reg;
- int this_cpu = raw_smp_processor_id();
-
- if (rv->msrs)
- reg = &rv->msrs[this_cpu - rv->off];
- else
- reg = &rv->reg;
-
- rdmsr(rv->msr_no, reg->l, reg->h);
-}
-
-static void __wrmsr_on_cpu(void *info)
-{
- struct msr_info *rv = info;
- struct msr *reg;
- int this_cpu = raw_smp_processor_id();
-
- if (rv->msrs)
- reg = &rv->msrs[this_cpu - rv->off];
- else
- reg = &rv->reg;
-
- wrmsr(rv->msr_no, reg->l, reg->h);
-}
-
-int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h)
-{
- int err;
- struct msr_info rv;
-
- memset(&rv, 0, sizeof(rv));
-
- rv.msr_no = msr_no;
- err = smp_call_function_single(cpu, __rdmsr_on_cpu, &rv, 1);
- *l = rv.reg.l;
- *h = rv.reg.h;
-
- return err;
-}
-EXPORT_SYMBOL(rdmsr_on_cpu);
-
-int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
-{
- int err;
- struct msr_info rv;
-
- memset(&rv, 0, sizeof(rv));
-
- rv.msr_no = msr_no;
- rv.reg.l = l;
- rv.reg.h = h;
- err = smp_call_function_single(cpu, __wrmsr_on_cpu, &rv, 1);
-
- return err;
-}
-EXPORT_SYMBOL(wrmsr_on_cpu);
-
-static void __rwmsr_on_cpus(const struct cpumask *mask, u32 msr_no,
- struct msr *msrs,
- void (*msr_func) (void *info))
-{
- struct msr_info rv;
- int this_cpu;
-
- memset(&rv, 0, sizeof(rv));
-
- rv.off = cpumask_first(mask);
- rv.msrs = msrs;
- rv.msr_no = msr_no;
-
- this_cpu = get_cpu();
-
- if (cpumask_test_cpu(this_cpu, mask))
- msr_func(&rv);
-
- smp_call_function_many(mask, msr_func, &rv, 1);
- put_cpu();
-}
-
-/* rdmsr on a bunch of CPUs
- *
- * @mask: which CPUs
- * @msr_no: which MSR
- * @msrs: array of MSR values
- *
- */
-void rdmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs)
-{
- __rwmsr_on_cpus(mask, msr_no, msrs, __rdmsr_on_cpu);
-}
-EXPORT_SYMBOL(rdmsr_on_cpus);
-
-/*
- * wrmsr on a bunch of CPUs
- *
- * @mask: which CPUs
- * @msr_no: which MSR
- * @msrs: array of MSR values
- *
- */
-void wrmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs)
+struct msr *msrs_alloc(void)
{
- __rwmsr_on_cpus(mask, msr_no, msrs, __wrmsr_on_cpu);
-}
-EXPORT_SYMBOL(wrmsr_on_cpus);
-
-/* These "safe" variants are slower and should be used when the target MSR
- may not actually exist. */
-static void __rdmsr_safe_on_cpu(void *info)
-{
- struct msr_info *rv = info;
-
- rv->err = rdmsr_safe(rv->msr_no, &rv->reg.l, &rv->reg.h);
-}
-
-static void __wrmsr_safe_on_cpu(void *info)
-{
- struct msr_info *rv = info;
-
- rv->err = wrmsr_safe(rv->msr_no, rv->reg.l, rv->reg.h);
-}
-
-int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h)
-{
- int err;
- struct msr_info rv;
-
- memset(&rv, 0, sizeof(rv));
-
- rv.msr_no = msr_no;
- err = smp_call_function_single(cpu, __rdmsr_safe_on_cpu, &rv, 1);
- *l = rv.reg.l;
- *h = rv.reg.h;
-
- return err ? err : rv.err;
-}
-EXPORT_SYMBOL(rdmsr_safe_on_cpu);
-
-int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
-{
- int err;
- struct msr_info rv;
+ struct msr *msrs = NULL;
- memset(&rv, 0, sizeof(rv));
+ msrs = alloc_percpu(struct msr);
+ if (!msrs) {
+ pr_warning("%s: error allocating msrs\n", __func__);
+ return NULL;
+ }
- rv.msr_no = msr_no;
- rv.reg.l = l;
- rv.reg.h = h;
- err = smp_call_function_single(cpu, __wrmsr_safe_on_cpu, &rv, 1);
-
- return err ? err : rv.err;
+ return msrs;
}
-EXPORT_SYMBOL(wrmsr_safe_on_cpu);
-
-/*
- * These variants are significantly slower, but allows control over
- * the entire 32-bit GPR set.
- */
-struct msr_regs_info {
- u32 *regs;
- int err;
-};
+EXPORT_SYMBOL(msrs_alloc);
-static void __rdmsr_safe_regs_on_cpu(void *info)
+void msrs_free(struct msr *msrs)
{
- struct msr_regs_info *rv = info;
-
- rv->err = rdmsr_safe_regs(rv->regs);
-}
-
-static void __wrmsr_safe_regs_on_cpu(void *info)
-{
- struct msr_regs_info *rv = info;
-
- rv->err = wrmsr_safe_regs(rv->regs);
-}
-
-int rdmsr_safe_regs_on_cpu(unsigned int cpu, u32 *regs)
-{
- int err;
- struct msr_regs_info rv;
-
- rv.regs = regs;
- rv.err = -EIO;
- err = smp_call_function_single(cpu, __rdmsr_safe_regs_on_cpu, &rv, 1);
-
- return err ? err : rv.err;
-}
-EXPORT_SYMBOL(rdmsr_safe_regs_on_cpu);
-
-int wrmsr_safe_regs_on_cpu(unsigned int cpu, u32 *regs)
-{
- int err;
- struct msr_regs_info rv;
-
- rv.regs = regs;
- rv.err = -EIO;
- err = smp_call_function_single(cpu, __wrmsr_safe_regs_on_cpu, &rv, 1);
-
- return err ? err : rv.err;
+ free_percpu(msrs);
}
-EXPORT_SYMBOL(wrmsr_safe_regs_on_cpu);
+EXPORT_SYMBOL(msrs_free);
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 73ffd5536f62..d406c5239019 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -146,10 +146,6 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
use_gbpages = direct_gbpages;
#endif
- set_nx();
- if (nx_enabled)
- printk(KERN_INFO "NX (Execute Disable) protection: active\n");
-
/* Enable PSE if available */
if (cpu_has_pse)
set_in_cr4(X86_CR4_PSE);
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 30938c1d8d5d..c973f8e2a6cf 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -412,7 +412,7 @@ static void __init permanent_kmaps_init(pgd_t *pgd_base)
pkmap_page_table = pte;
}
-static void __init add_one_highpage_init(struct page *page, int pfn)
+static void __init add_one_highpage_init(struct page *page)
{
ClearPageReserved(page);
init_page_count(page);
@@ -445,7 +445,7 @@ static int __init add_highpages_work_fn(unsigned long start_pfn,
if (!pfn_valid(node_pfn))
continue;
page = pfn_to_page(node_pfn);
- add_one_highpage_init(page, node_pfn);
+ add_one_highpage_init(page);
}
return 0;
@@ -703,8 +703,8 @@ void __init find_low_pfn_range(void)
}
#ifndef CONFIG_NEED_MULTIPLE_NODES
-void __init initmem_init(unsigned long start_pfn,
- unsigned long end_pfn)
+void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn,
+ int acpi, int k8)
{
#ifdef CONFIG_HIGHMEM
highstart_pfn = highend_pfn = max_pfn;
@@ -997,7 +997,7 @@ static noinline int do_test_wp_bit(void)
const int rodata_test_data = 0xC3;
EXPORT_SYMBOL_GPL(rodata_test_data);
-static int kernel_set_to_readonly;
+int kernel_set_to_readonly __read_mostly;
void set_kernel_text_rw(void)
{
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 5a4398a6006b..5198b9bb34ef 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -568,7 +568,8 @@ kernel_physical_mapping_init(unsigned long start,
}
#ifndef CONFIG_NUMA
-void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn)
+void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn,
+ int acpi, int k8)
{
unsigned long bootmap_size, bootmap;
@@ -694,12 +695,12 @@ void __init mem_init(void)
const int rodata_test_data = 0xC3;
EXPORT_SYMBOL_GPL(rodata_test_data);
-static int kernel_set_to_readonly;
+int kernel_set_to_readonly;
void set_kernel_text_rw(void)
{
- unsigned long start = PFN_ALIGN(_stext);
- unsigned long end = PFN_ALIGN(__start_rodata);
+ unsigned long start = PFN_ALIGN(_text);
+ unsigned long end = PFN_ALIGN(__stop___ex_table);
if (!kernel_set_to_readonly)
return;
@@ -707,13 +708,18 @@ void set_kernel_text_rw(void)
pr_debug("Set kernel text: %lx - %lx for read write\n",
start, end);
+ /*
+ * Make the kernel identity mapping for text RW. Kernel text
+ * mapping will always be RO. Refer to the comment in
+ * static_protections() in pageattr.c
+ */
set_memory_rw(start, (end - start) >> PAGE_SHIFT);
}
void set_kernel_text_ro(void)
{
- unsigned long start = PFN_ALIGN(_stext);
- unsigned long end = PFN_ALIGN(__start_rodata);
+ unsigned long start = PFN_ALIGN(_text);
+ unsigned long end = PFN_ALIGN(__stop___ex_table);
if (!kernel_set_to_readonly)
return;
@@ -721,14 +727,21 @@ void set_kernel_text_ro(void)
pr_debug("Set kernel text: %lx - %lx for read only\n",
start, end);
+ /*
+ * Set the kernel identity mapping for text RO.
+ */
set_memory_ro(start, (end - start) >> PAGE_SHIFT);
}
void mark_rodata_ro(void)
{
- unsigned long start = PFN_ALIGN(_stext), end = PFN_ALIGN(__end_rodata);
+ unsigned long start = PFN_ALIGN(_text);
unsigned long rodata_start =
((unsigned long)__start_rodata + PAGE_SIZE - 1) & PAGE_MASK;
+ unsigned long end = (unsigned long) &__end_rodata_hpage_align;
+ unsigned long text_end = PAGE_ALIGN((unsigned long) &__stop___ex_table);
+ unsigned long rodata_end = PAGE_ALIGN((unsigned long) &__end_rodata);
+ unsigned long data_start = (unsigned long) &_sdata;
printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
(end - start) >> 10);
@@ -751,6 +764,14 @@ void mark_rodata_ro(void)
printk(KERN_INFO "Testing CPA: again\n");
set_memory_ro(start, (end-start) >> PAGE_SHIFT);
#endif
+
+ free_init_pages("unused kernel memory",
+ (unsigned long) page_address(virt_to_page(text_end)),
+ (unsigned long)
+ page_address(virt_to_page(rodata_start)));
+ free_init_pages("unused kernel memory",
+ (unsigned long) page_address(virt_to_page(rodata_end)),
+ (unsigned long) page_address(virt_to_page(data_start)));
}
#endif
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 2feb9bdedaaf..c246d259822d 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -281,30 +281,6 @@ void __iomem *ioremap_cache(resource_size_t phys_addr, unsigned long size)
}
EXPORT_SYMBOL(ioremap_cache);
-static void __iomem *ioremap_default(resource_size_t phys_addr,
- unsigned long size)
-{
- unsigned long flags;
- void __iomem *ret;
- int err;
-
- /*
- * - WB for WB-able memory and no other conflicting mappings
- * - UC_MINUS for non-WB-able memory with no other conflicting mappings
- * - Inherit from confliting mappings otherwise
- */
- err = reserve_memtype(phys_addr, phys_addr + size,
- _PAGE_CACHE_WB, &flags);
- if (err < 0)
- return NULL;
-
- ret = __ioremap_caller(phys_addr, size, flags,
- __builtin_return_address(0));
-
- free_memtype(phys_addr, phys_addr + size);
- return ret;
-}
-
void __iomem *ioremap_prot(resource_size_t phys_addr, unsigned long size,
unsigned long prot_val)
{
@@ -380,7 +356,7 @@ void *xlate_dev_mem_ptr(unsigned long phys)
if (page_is_ram(start >> PAGE_SHIFT))
return __va(phys);
- addr = (void __force *)ioremap_default(start, PAGE_SIZE);
+ addr = (void __force *)ioremap_cache(start, PAGE_SIZE);
if (addr)
addr = (void *)((unsigned long)addr | (phys & ~PAGE_MASK));
diff --git a/arch/x86/mm/k8topology_64.c b/arch/x86/mm/k8topology_64.c
index 268f8255280f..970ed579d4e4 100644
--- a/arch/x86/mm/k8topology_64.c
+++ b/arch/x86/mm/k8topology_64.c
@@ -24,6 +24,9 @@
#include <asm/apic.h>
#include <asm/k8.h>
+static struct bootnode __initdata nodes[8];
+static nodemask_t __initdata nodes_parsed = NODE_MASK_NONE;
+
static __init int find_northbridge(void)
{
int num;
@@ -54,18 +57,6 @@ static __init void early_get_boot_cpu_id(void)
* need to get boot_cpu_id so can use that to create apicid_to_node
* in k8_scan_nodes()
*/
- /*
- * Find possible boot-time SMP configuration:
- */
-#ifdef CONFIG_X86_MPPARSE
- early_find_smp_config();
-#endif
-#ifdef CONFIG_ACPI
- /*
- * Read APIC information from ACPI tables.
- */
- early_acpi_boot_init();
-#endif
#ifdef CONFIG_X86_MPPARSE
/*
* get boot-time SMP configuration:
@@ -76,12 +67,26 @@ static __init void early_get_boot_cpu_id(void)
early_init_lapic_mapping();
}
-int __init k8_scan_nodes(unsigned long start, unsigned long end)
+int __init k8_get_nodes(struct bootnode *physnodes)
{
- unsigned numnodes, cores, bits, apicid_base;
+ int i;
+ int ret = 0;
+
+ for_each_node_mask(i, nodes_parsed) {
+ physnodes[ret].start = nodes[i].start;
+ physnodes[ret].end = nodes[i].end;
+ ret++;
+ }
+ return ret;
+}
+
+int __init k8_numa_init(unsigned long start_pfn, unsigned long end_pfn)
+{
+ unsigned long start = PFN_PHYS(start_pfn);
+ unsigned long end = PFN_PHYS(end_pfn);
+ unsigned numnodes;
unsigned long prevbase;
- struct bootnode nodes[8];
- int i, j, nb, found = 0;
+ int i, nb, found = 0;
u32 nodeid, reg;
if (!early_pci_allowed())
@@ -91,16 +96,15 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
if (nb < 0)
return nb;
- printk(KERN_INFO "Scanning NUMA topology in Northbridge %d\n", nb);
+ pr_info("Scanning NUMA topology in Northbridge %d\n", nb);
reg = read_pci_config(0, nb, 0, 0x60);
numnodes = ((reg >> 4) & 0xF) + 1;
if (numnodes <= 1)
return -1;
- printk(KERN_INFO "Number of nodes %d\n", numnodes);
+ pr_info("Number of physical nodes %d\n", numnodes);
- memset(&nodes, 0, sizeof(nodes));
prevbase = 0;
for (i = 0; i < 8; i++) {
unsigned long base, limit;
@@ -111,28 +115,28 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
nodeid = limit & 7;
if ((base & 3) == 0) {
if (i < numnodes)
- printk("Skipping disabled node %d\n", i);
+ pr_info("Skipping disabled node %d\n", i);
continue;
}
if (nodeid >= numnodes) {
- printk("Ignoring excess node %d (%lx:%lx)\n", nodeid,
- base, limit);
+ pr_info("Ignoring excess node %d (%lx:%lx)\n", nodeid,
+ base, limit);
continue;
}
if (!limit) {
- printk(KERN_INFO "Skipping node entry %d (base %lx)\n",
- i, base);
+ pr_info("Skipping node entry %d (base %lx)\n",
+ i, base);
continue;
}
if ((base >> 8) & 3 || (limit >> 8) & 3) {
- printk(KERN_ERR "Node %d using interleaving mode %lx/%lx\n",
- nodeid, (base>>8)&3, (limit>>8) & 3);
+ pr_err("Node %d using interleaving mode %lx/%lx\n",
+ nodeid, (base >> 8) & 3, (limit >> 8) & 3);
return -1;
}
- if (node_isset(nodeid, node_possible_map)) {
- printk(KERN_INFO "Node %d already present. Skipping\n",
- nodeid);
+ if (node_isset(nodeid, nodes_parsed)) {
+ pr_info("Node %d already present, skipping\n",
+ nodeid);
continue;
}
@@ -141,8 +145,8 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
limit |= (1<<24)-1;
limit++;
- if (limit > max_pfn << PAGE_SHIFT)
- limit = max_pfn << PAGE_SHIFT;
+ if (limit > end)
+ limit = end;
if (limit <= base)
continue;
@@ -154,24 +158,24 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
if (limit > end)
limit = end;
if (limit == base) {
- printk(KERN_ERR "Empty node %d\n", nodeid);
+ pr_err("Empty node %d\n", nodeid);
continue;
}
if (limit < base) {
- printk(KERN_ERR "Node %d bogus settings %lx-%lx.\n",
+ pr_err("Node %d bogus settings %lx-%lx.\n",
nodeid, base, limit);
continue;
}
/* Could sort here, but pun for now. Should not happen anyroads. */
if (prevbase > base) {
- printk(KERN_ERR "Node map not sorted %lx,%lx\n",
+ pr_err("Node map not sorted %lx,%lx\n",
prevbase, base);
return -1;
}
- printk(KERN_INFO "Node %d MemBase %016lx Limit %016lx\n",
- nodeid, base, limit);
+ pr_info("Node %d MemBase %016lx Limit %016lx\n",
+ nodeid, base, limit);
found++;
@@ -180,18 +184,29 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
prevbase = base;
- node_set(nodeid, node_possible_map);
+ node_set(nodeid, nodes_parsed);
}
if (!found)
return -1;
+ return 0;
+}
+
+int __init k8_scan_nodes(void)
+{
+ unsigned int bits;
+ unsigned int cores;
+ unsigned int apicid_base;
+ int i;
+ BUG_ON(nodes_empty(nodes_parsed));
+ node_possible_map = nodes_parsed;
memnode_shift = compute_hash_shift(nodes, 8, NULL);
if (memnode_shift < 0) {
- printk(KERN_ERR "No NUMA node hash function found. Contact maintainer\n");
+ pr_err("No NUMA node hash function found. Contact maintainer\n");
return -1;
}
- printk(KERN_INFO "Using node hash shift of %d\n", memnode_shift);
+ pr_info("Using node hash shift of %d\n", memnode_shift);
/* use the coreid bits from early_identify_cpu */
bits = boot_cpu_data.x86_coreid_bits;
@@ -200,14 +215,12 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
/* need to get boot_cpu_id early for system with apicid lifting */
early_get_boot_cpu_id();
if (boot_cpu_physical_apicid > 0) {
- printk(KERN_INFO "BSP APIC ID: %02x\n",
- boot_cpu_physical_apicid);
+ pr_info("BSP APIC ID: %02x\n", boot_cpu_physical_apicid);
apicid_base = boot_cpu_physical_apicid;
}
- for (i = 0; i < 8; i++) {
- if (nodes[i].start == nodes[i].end)
- continue;
+ for_each_node_mask(i, node_possible_map) {
+ int j;
e820_register_active_regions(i,
nodes[i].start >> PAGE_SHIFT,
diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c
index 11a4ad4d6253..c0f6198565eb 100644
--- a/arch/x86/mm/kmmio.c
+++ b/arch/x86/mm/kmmio.c
@@ -5,6 +5,8 @@
* 2008 Pekka Paalanen <pq@iki.fi>
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/list.h>
#include <linux/rculist.h>
#include <linux/spinlock.h>
@@ -136,7 +138,7 @@ static int clear_page_presence(struct kmmio_fault_page *f, bool clear)
pte_t *pte = lookup_address(f->page, &level);
if (!pte) {
- pr_err("kmmio: no pte for page 0x%08lx\n", f->page);
+ pr_err("no pte for page 0x%08lx\n", f->page);
return -1;
}
@@ -148,7 +150,7 @@ static int clear_page_presence(struct kmmio_fault_page *f, bool clear)
clear_pte_presence(pte, clear, &f->old_presence);
break;
default:
- pr_err("kmmio: unexpected page level 0x%x.\n", level);
+ pr_err("unexpected page level 0x%x.\n", level);
return -1;
}
@@ -170,13 +172,14 @@ static int clear_page_presence(struct kmmio_fault_page *f, bool clear)
static int arm_kmmio_fault_page(struct kmmio_fault_page *f)
{
int ret;
- WARN_ONCE(f->armed, KERN_ERR "kmmio page already armed.\n");
+ WARN_ONCE(f->armed, KERN_ERR pr_fmt("kmmio page already armed.\n"));
if (f->armed) {
- pr_warning("kmmio double-arm: page 0x%08lx, ref %d, old %d\n",
- f->page, f->count, !!f->old_presence);
+ pr_warning("double-arm: page 0x%08lx, ref %d, old %d\n",
+ f->page, f->count, !!f->old_presence);
}
ret = clear_page_presence(f, true);
- WARN_ONCE(ret < 0, KERN_ERR "kmmio arming 0x%08lx failed.\n", f->page);
+ WARN_ONCE(ret < 0, KERN_ERR pr_fmt("arming 0x%08lx failed.\n"),
+ f->page);
f->armed = true;
return ret;
}
@@ -203,7 +206,7 @@ static void disarm_kmmio_fault_page(struct kmmio_fault_page *f)
*/
/*
* Interrupts are disabled on entry as trap3 is an interrupt gate
- * and they remain disabled thorough out this function.
+ * and they remain disabled throughout this function.
*/
int kmmio_handler(struct pt_regs *regs, unsigned long addr)
{
@@ -240,24 +243,21 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr)
* condition needs handling by do_page_fault(), the
* page really not being present is the most common.
*/
- pr_debug("kmmio: secondary hit for 0x%08lx CPU %d.\n",
- addr, smp_processor_id());
+ pr_debug("secondary hit for 0x%08lx CPU %d.\n",
+ addr, smp_processor_id());
if (!faultpage->old_presence)
- pr_info("kmmio: unexpected secondary hit for "
- "address 0x%08lx on CPU %d.\n", addr,
- smp_processor_id());
+ pr_info("unexpected secondary hit for address 0x%08lx on CPU %d.\n",
+ addr, smp_processor_id());
} else {
/*
* Prevent overwriting already in-flight context.
* This should not happen, let's hope disarming at
* least prevents a panic.
*/
- pr_emerg("kmmio: recursive probe hit on CPU %d, "
- "for address 0x%08lx. Ignoring.\n",
- smp_processor_id(), addr);
- pr_emerg("kmmio: previous hit was at 0x%08lx.\n",
- ctx->addr);
+ pr_emerg("recursive probe hit on CPU %d, for address 0x%08lx. Ignoring.\n",
+ smp_processor_id(), addr);
+ pr_emerg("previous hit was at 0x%08lx.\n", ctx->addr);
disarm_kmmio_fault_page(faultpage);
}
goto no_kmmio_ctx;
@@ -302,7 +302,7 @@ no_kmmio:
/*
* Interrupts are disabled on entry as trap1 is an interrupt gate
- * and they remain disabled thorough out this function.
+ * and they remain disabled throughout this function.
* This must always get called as the pair to kmmio_handler().
*/
static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs)
@@ -316,8 +316,8 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs)
* something external causing them (f.e. using a debugger while
* mmio tracing enabled), or erroneous behaviour
*/
- pr_warning("kmmio: unexpected debug trap on CPU %d.\n",
- smp_processor_id());
+ pr_warning("unexpected debug trap on CPU %d.\n",
+ smp_processor_id());
goto out;
}
@@ -425,7 +425,7 @@ int register_kmmio_probe(struct kmmio_probe *p)
list_add_rcu(&p->list, &kmmio_probes);
while (size < size_lim) {
if (add_kmmio_fault_page(p->addr + size))
- pr_err("kmmio: Unable to set page fault.\n");
+ pr_err("Unable to set page fault.\n");
size += PAGE_SIZE;
}
out:
@@ -490,7 +490,7 @@ static void remove_kmmio_fault_pages(struct rcu_head *head)
* 2. remove_kmmio_fault_pages()
* Remove the pages from kmmio_page_table.
* 3. rcu_free_kmmio_fault_pages()
- * Actally free the kmmio_fault_page structs as with RCU.
+ * Actually free the kmmio_fault_page structs as with RCU.
*/
void unregister_kmmio_probe(struct kmmio_probe *p)
{
@@ -511,7 +511,7 @@ void unregister_kmmio_probe(struct kmmio_probe *p)
drelease = kmalloc(sizeof(*drelease), GFP_ATOMIC);
if (!drelease) {
- pr_crit("kmmio: leaking kmmio_fault_page objects.\n");
+ pr_crit("leaking kmmio_fault_page objects.\n");
return;
}
drelease->release_list = release_list;
diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c
index 132772a8ec57..34a3291ca103 100644
--- a/arch/x86/mm/mmio-mod.c
+++ b/arch/x86/mm/mmio-mod.c
@@ -19,6 +19,9 @@
*
* Derived from the read-mod example from relay-examples by Tom Zanussi.
*/
+
+#define pr_fmt(fmt) "mmiotrace: " fmt
+
#define DEBUG 1
#include <linux/module.h>
@@ -36,8 +39,6 @@
#include "pf_in.h"
-#define NAME "mmiotrace: "
-
struct trap_reason {
unsigned long addr;
unsigned long ip;
@@ -96,17 +97,18 @@ static void print_pte(unsigned long address)
pte_t *pte = lookup_address(address, &level);
if (!pte) {
- pr_err(NAME "Error in %s: no pte for page 0x%08lx\n",
- __func__, address);
+ pr_err("Error in %s: no pte for page 0x%08lx\n",
+ __func__, address);
return;
}
if (level == PG_LEVEL_2M) {
- pr_emerg(NAME "4MB pages are not currently supported: "
- "0x%08lx\n", address);
+ pr_emerg("4MB pages are not currently supported: 0x%08lx\n",
+ address);
BUG();
}
- pr_info(NAME "pte for 0x%lx: 0x%llx 0x%llx\n", address,
+ pr_info("pte for 0x%lx: 0x%llx 0x%llx\n",
+ address,
(unsigned long long)pte_val(*pte),
(unsigned long long)pte_val(*pte) & _PAGE_PRESENT);
}
@@ -118,22 +120,21 @@ static void print_pte(unsigned long address)
static void die_kmmio_nesting_error(struct pt_regs *regs, unsigned long addr)
{
const struct trap_reason *my_reason = &get_cpu_var(pf_reason);
- pr_emerg(NAME "unexpected fault for address: 0x%08lx, "
- "last fault for address: 0x%08lx\n",
- addr, my_reason->addr);
+ pr_emerg("unexpected fault for address: 0x%08lx, last fault for address: 0x%08lx\n",
+ addr, my_reason->addr);
print_pte(addr);
print_symbol(KERN_EMERG "faulting IP is at %s\n", regs->ip);
print_symbol(KERN_EMERG "last faulting IP was at %s\n", my_reason->ip);
#ifdef __i386__
pr_emerg("eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n",
- regs->ax, regs->bx, regs->cx, regs->dx);
+ regs->ax, regs->bx, regs->cx, regs->dx);
pr_emerg("esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n",
- regs->si, regs->di, regs->bp, regs->sp);
+ regs->si, regs->di, regs->bp, regs->sp);
#else
pr_emerg("rax: %016lx rcx: %016lx rdx: %016lx\n",
- regs->ax, regs->cx, regs->dx);
+ regs->ax, regs->cx, regs->dx);
pr_emerg("rsi: %016lx rdi: %016lx rbp: %016lx rsp: %016lx\n",
- regs->si, regs->di, regs->bp, regs->sp);
+ regs->si, regs->di, regs->bp, regs->sp);
#endif
put_cpu_var(pf_reason);
BUG();
@@ -213,7 +214,7 @@ static void post(struct kmmio_probe *p, unsigned long condition,
/* this should always return the active_trace count to 0 */
my_reason->active_traces--;
if (my_reason->active_traces) {
- pr_emerg(NAME "unexpected post handler");
+ pr_emerg("unexpected post handler");
BUG();
}
@@ -244,7 +245,7 @@ static void ioremap_trace_core(resource_size_t offset, unsigned long size,
};
if (!trace) {
- pr_err(NAME "kmalloc failed in ioremap\n");
+ pr_err("kmalloc failed in ioremap\n");
return;
}
@@ -282,8 +283,8 @@ void mmiotrace_ioremap(resource_size_t offset, unsigned long size,
if (!is_enabled()) /* recheck and proper locking in *_core() */
return;
- pr_debug(NAME "ioremap_*(0x%llx, 0x%lx) = %p\n",
- (unsigned long long)offset, size, addr);
+ pr_debug("ioremap_*(0x%llx, 0x%lx) = %p\n",
+ (unsigned long long)offset, size, addr);
if ((filter_offset) && (offset != filter_offset))
return;
ioremap_trace_core(offset, size, addr);
@@ -301,7 +302,7 @@ static void iounmap_trace_core(volatile void __iomem *addr)
struct remap_trace *tmp;
struct remap_trace *found_trace = NULL;
- pr_debug(NAME "Unmapping %p.\n", addr);
+ pr_debug("Unmapping %p.\n", addr);
spin_lock_irq(&trace_lock);
if (!is_enabled())
@@ -363,9 +364,8 @@ static void clear_trace_list(void)
* Caller also ensures is_enabled() cannot change.
*/
list_for_each_entry(trace, &trace_list, list) {
- pr_notice(NAME "purging non-iounmapped "
- "trace @0x%08lx, size 0x%lx.\n",
- trace->probe.addr, trace->probe.len);
+ pr_notice("purging non-iounmapped trace @0x%08lx, size 0x%lx.\n",
+ trace->probe.addr, trace->probe.len);
if (!nommiotrace)
unregister_kmmio_probe(&trace->probe);
}
@@ -387,7 +387,7 @@ static void enter_uniprocessor(void)
if (downed_cpus == NULL &&
!alloc_cpumask_var(&downed_cpus, GFP_KERNEL)) {
- pr_notice(NAME "Failed to allocate mask\n");
+ pr_notice("Failed to allocate mask\n");
goto out;
}
@@ -395,20 +395,19 @@ static void enter_uniprocessor(void)
cpumask_copy(downed_cpus, cpu_online_mask);
cpumask_clear_cpu(cpumask_first(cpu_online_mask), downed_cpus);
if (num_online_cpus() > 1)
- pr_notice(NAME "Disabling non-boot CPUs...\n");
+ pr_notice("Disabling non-boot CPUs...\n");
put_online_cpus();
for_each_cpu(cpu, downed_cpus) {
err = cpu_down(cpu);
if (!err)
- pr_info(NAME "CPU%d is down.\n", cpu);
+ pr_info("CPU%d is down.\n", cpu);
else
- pr_err(NAME "Error taking CPU%d down: %d\n", cpu, err);
+ pr_err("Error taking CPU%d down: %d\n", cpu, err);
}
out:
if (num_online_cpus() > 1)
- pr_warning(NAME "multiple CPUs still online, "
- "may miss events.\n");
+ pr_warning("multiple CPUs still online, may miss events.\n");
}
/* __ref because leave_uniprocessor calls cpu_up which is __cpuinit,
@@ -420,13 +419,13 @@ static void __ref leave_uniprocessor(void)
if (downed_cpus == NULL || cpumask_weight(downed_cpus) == 0)
return;
- pr_notice(NAME "Re-enabling CPUs...\n");
+ pr_notice("Re-enabling CPUs...\n");
for_each_cpu(cpu, downed_cpus) {
err = cpu_up(cpu);
if (!err)
- pr_info(NAME "enabled CPU%d.\n", cpu);
+ pr_info("enabled CPU%d.\n", cpu);
else
- pr_err(NAME "cannot re-enable CPU%d: %d\n", cpu, err);
+ pr_err("cannot re-enable CPU%d: %d\n", cpu, err);
}
}
@@ -434,8 +433,8 @@ static void __ref leave_uniprocessor(void)
static void enter_uniprocessor(void)
{
if (num_online_cpus() > 1)
- pr_warning(NAME "multiple CPUs are online, may miss events. "
- "Suggest booting with maxcpus=1 kernel argument.\n");
+ pr_warning("multiple CPUs are online, may miss events. "
+ "Suggest booting with maxcpus=1 kernel argument.\n");
}
static void leave_uniprocessor(void)
@@ -450,13 +449,13 @@ void enable_mmiotrace(void)
goto out;
if (nommiotrace)
- pr_info(NAME "MMIO tracing disabled.\n");
+ pr_info("MMIO tracing disabled.\n");
kmmio_init();
enter_uniprocessor();
spin_lock_irq(&trace_lock);
atomic_inc(&mmiotrace_enabled);
spin_unlock_irq(&trace_lock);
- pr_info(NAME "enabled.\n");
+ pr_info("enabled.\n");
out:
mutex_unlock(&mmiotrace_mutex);
}
@@ -475,7 +474,7 @@ void disable_mmiotrace(void)
clear_trace_list(); /* guarantees: no more kmmio callbacks */
leave_uniprocessor();
kmmio_cleanup();
- pr_info(NAME "disabled.\n");
+ pr_info("disabled.\n");
out:
mutex_unlock(&mmiotrace_mutex);
}
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index d2530062fe00..b20760ca7244 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -347,8 +347,8 @@ static void init_remap_allocator(int nid)
(ulong) node_remap_end_vaddr[nid]);
}
-void __init initmem_init(unsigned long start_pfn,
- unsigned long end_pfn)
+void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn,
+ int acpi, int k8)
{
int nid;
long kva_target_pfn;
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 459913beac71..83bbc70d11bb 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -239,8 +239,14 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
bootmap = early_node_mem(nodeid, bootmap_start, end,
bootmap_pages<<PAGE_SHIFT, PAGE_SIZE);
if (bootmap == NULL) {
- if (nodedata_phys < start || nodedata_phys >= end)
- free_bootmem(nodedata_phys, pgdat_size);
+ if (nodedata_phys < start || nodedata_phys >= end) {
+ /*
+ * only need to free it if it is from other node
+ * bootmem
+ */
+ if (nid != nodeid)
+ free_bootmem(nodedata_phys, pgdat_size);
+ }
node_data[nodeid] = NULL;
return;
}
@@ -306,8 +312,71 @@ void __init numa_init_array(void)
#ifdef CONFIG_NUMA_EMU
/* Numa emulation */
+static struct bootnode nodes[MAX_NUMNODES] __initdata;
+static struct bootnode physnodes[MAX_NUMNODES] __initdata;
static char *cmdline __initdata;
+static int __init setup_physnodes(unsigned long start, unsigned long end,
+ int acpi, int k8)
+{
+ int nr_nodes = 0;
+ int ret = 0;
+ int i;
+
+#ifdef CONFIG_ACPI_NUMA
+ if (acpi)
+ nr_nodes = acpi_get_nodes(physnodes);
+#endif
+#ifdef CONFIG_K8_NUMA
+ if (k8)
+ nr_nodes = k8_get_nodes(physnodes);
+#endif
+ /*
+ * Basic sanity checking on the physical node map: there may be errors
+ * if the SRAT or K8 incorrectly reported the topology or the mem=
+ * kernel parameter is used.
+ */
+ for (i = 0; i < nr_nodes; i++) {
+ if (physnodes[i].start == physnodes[i].end)
+ continue;
+ if (physnodes[i].start > end) {
+ physnodes[i].end = physnodes[i].start;
+ continue;
+ }
+ if (physnodes[i].end < start) {
+ physnodes[i].start = physnodes[i].end;
+ continue;
+ }
+ if (physnodes[i].start < start)
+ physnodes[i].start = start;
+ if (physnodes[i].end > end)
+ physnodes[i].end = end;
+ }
+
+ /*
+ * Remove all nodes that have no memory or were truncated because of the
+ * limited address range.
+ */
+ for (i = 0; i < nr_nodes; i++) {
+ if (physnodes[i].start == physnodes[i].end)
+ continue;
+ physnodes[ret].start = physnodes[i].start;
+ physnodes[ret].end = physnodes[i].end;
+ ret++;
+ }
+
+ /*
+ * If no physical topology was detected, a single node is faked to cover
+ * the entire address space.
+ */
+ if (!ret) {
+ physnodes[ret].start = start;
+ physnodes[ret].end = end;
+ ret = 1;
+ }
+ return ret;
+}
+
/*
* Setups up nid to range from addr to addr + size. If the end
* boundary is greater than max_addr, then max_addr is used instead.
@@ -315,11 +384,9 @@ static char *cmdline __initdata;
* allocation past addr and -1 otherwise. addr is adjusted to be at
* the end of the node.
*/
-static int __init setup_node_range(int nid, struct bootnode *nodes, u64 *addr,
- u64 size, u64 max_addr)
+static int __init setup_node_range(int nid, u64 *addr, u64 size, u64 max_addr)
{
int ret = 0;
-
nodes[nid].start = *addr;
*addr += size;
if (*addr >= max_addr) {
@@ -335,12 +402,111 @@ static int __init setup_node_range(int nid, struct bootnode *nodes, u64 *addr,
}
/*
+ * Sets up nr_nodes fake nodes interleaved over physical nodes ranging from addr
+ * to max_addr. The return value is the number of nodes allocated.
+ */
+static int __init split_nodes_interleave(u64 addr, u64 max_addr,
+ int nr_phys_nodes, int nr_nodes)
+{
+ nodemask_t physnode_mask = NODE_MASK_NONE;
+ u64 size;
+ int big;
+ int ret = 0;
+ int i;
+
+ if (nr_nodes <= 0)
+ return -1;
+ if (nr_nodes > MAX_NUMNODES) {
+ pr_info("numa=fake=%d too large, reducing to %d\n",
+ nr_nodes, MAX_NUMNODES);
+ nr_nodes = MAX_NUMNODES;
+ }
+
+ size = (max_addr - addr - e820_hole_size(addr, max_addr)) / nr_nodes;
+ /*
+ * Calculate the number of big nodes that can be allocated as a result
+ * of consolidating the remainder.
+ */
+ big = ((size & ~FAKE_NODE_MIN_HASH_MASK) & nr_nodes) /
+ FAKE_NODE_MIN_SIZE;
+
+ size &= FAKE_NODE_MIN_HASH_MASK;
+ if (!size) {
+ pr_err("Not enough memory for each node. "
+ "NUMA emulation disabled.\n");
+ return -1;
+ }
+
+ for (i = 0; i < nr_phys_nodes; i++)
+ if (physnodes[i].start != physnodes[i].end)
+ node_set(i, physnode_mask);
+
+ /*
+ * Continue to fill physical nodes with fake nodes until there is no
+ * memory left on any of them.
+ */
+ while (nodes_weight(physnode_mask)) {
+ for_each_node_mask(i, physnode_mask) {
+ u64 end = physnodes[i].start + size;
+ u64 dma32_end = PFN_PHYS(MAX_DMA32_PFN);
+
+ if (ret < big)
+ end += FAKE_NODE_MIN_SIZE;
+
+ /*
+ * Continue to add memory to this fake node if its
+ * non-reserved memory is less than the per-node size.
+ */
+ while (end - physnodes[i].start -
+ e820_hole_size(physnodes[i].start, end) < size) {
+ end += FAKE_NODE_MIN_SIZE;
+ if (end > physnodes[i].end) {
+ end = physnodes[i].end;
+ break;
+ }
+ }
+
+ /*
+ * If there won't be at least FAKE_NODE_MIN_SIZE of
+ * non-reserved memory in ZONE_DMA32 for the next node,
+ * this one must extend to the boundary.
+ */
+ if (end < dma32_end && dma32_end - end -
+ e820_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE)
+ end = dma32_end;
+
+ /*
+ * If there won't be enough non-reserved memory for the
+ * next node, this one must extend to the end of the
+ * physical node.
+ */
+ if (physnodes[i].end - end -
+ e820_hole_size(end, physnodes[i].end) < size)
+ end = physnodes[i].end;
+
+ /*
+ * Avoid allocating more nodes than requested, which can
+ * happen as a result of rounding down each node's size
+ * to FAKE_NODE_MIN_SIZE.
+ */
+ if (nodes_weight(physnode_mask) + ret >= nr_nodes)
+ end = physnodes[i].end;
+
+ if (setup_node_range(ret++, &physnodes[i].start,
+ end - physnodes[i].start,
+ physnodes[i].end) < 0)
+ node_clear(i, physnode_mask);
+ }
+ }
+ return ret;
+}
+
+/*
* Splits num_nodes nodes up equally starting at node_start. The return value
* is the number of nodes split up and addr is adjusted to be at the end of the
* last node allocated.
*/
-static int __init split_nodes_equally(struct bootnode *nodes, u64 *addr,
- u64 max_addr, int node_start,
+static int __init split_nodes_equally(u64 *addr, u64 max_addr, int node_start,
int num_nodes)
{
unsigned int big;
@@ -388,7 +554,7 @@ static int __init split_nodes_equally(struct bootnode *nodes, u64 *addr,
break;
}
}
- if (setup_node_range(i, nodes, addr, end - *addr, max_addr) < 0)
+ if (setup_node_range(i, addr, end - *addr, max_addr) < 0)
break;
}
return i - node_start + 1;
@@ -399,12 +565,12 @@ static int __init split_nodes_equally(struct bootnode *nodes, u64 *addr,
* always assigned to a final node and can be asymmetric. Returns the number of
* nodes split.
*/
-static int __init split_nodes_by_size(struct bootnode *nodes, u64 *addr,
- u64 max_addr, int node_start, u64 size)
+static int __init split_nodes_by_size(u64 *addr, u64 max_addr, int node_start,
+ u64 size)
{
int i = node_start;
size = (size << 20) & FAKE_NODE_MIN_HASH_MASK;
- while (!setup_node_range(i++, nodes, addr, size, max_addr))
+ while (!setup_node_range(i++, addr, size, max_addr))
;
return i - node_start;
}
@@ -413,15 +579,15 @@ static int __init split_nodes_by_size(struct bootnode *nodes, u64 *addr,
* Sets up the system RAM area from start_pfn to last_pfn according to the
* numa=fake command-line option.
*/
-static struct bootnode nodes[MAX_NUMNODES] __initdata;
-
-static int __init numa_emulation(unsigned long start_pfn, unsigned long last_pfn)
+static int __init numa_emulation(unsigned long start_pfn,
+ unsigned long last_pfn, int acpi, int k8)
{
u64 size, addr = start_pfn << PAGE_SHIFT;
u64 max_addr = last_pfn << PAGE_SHIFT;
int num_nodes = 0, num = 0, coeff_flag, coeff = -1, i;
+ int num_phys_nodes;
- memset(&nodes, 0, sizeof(nodes));
+ num_phys_nodes = setup_physnodes(addr, max_addr, acpi, k8);
/*
* If the numa=fake command-line is just a single number N, split the
* system RAM into N fake nodes.
@@ -429,7 +595,8 @@ static int __init numa_emulation(unsigned long start_pfn, unsigned long last_pfn
if (!strchr(cmdline, '*') && !strchr(cmdline, ',')) {
long n = simple_strtol(cmdline, NULL, 0);
- num_nodes = split_nodes_equally(nodes, &addr, max_addr, 0, n);
+ num_nodes = split_nodes_interleave(addr, max_addr,
+ num_phys_nodes, n);
if (num_nodes < 0)
return num_nodes;
goto out;
@@ -456,8 +623,8 @@ static int __init numa_emulation(unsigned long start_pfn, unsigned long last_pfn
size = ((u64)num << 20) & FAKE_NODE_MIN_HASH_MASK;
if (size)
for (i = 0; i < coeff; i++, num_nodes++)
- if (setup_node_range(num_nodes, nodes,
- &addr, size, max_addr) < 0)
+ if (setup_node_range(num_nodes, &addr,
+ size, max_addr) < 0)
goto done;
if (!*cmdline)
break;
@@ -473,7 +640,7 @@ done:
if (addr < max_addr) {
if (coeff_flag && coeff < 0) {
/* Split remaining nodes into num-sized chunks */
- num_nodes += split_nodes_by_size(nodes, &addr, max_addr,
+ num_nodes += split_nodes_by_size(&addr, max_addr,
num_nodes, num);
goto out;
}
@@ -482,7 +649,7 @@ done:
/* Split remaining nodes into coeff chunks */
if (coeff <= 0)
break;
- num_nodes += split_nodes_equally(nodes, &addr, max_addr,
+ num_nodes += split_nodes_equally(&addr, max_addr,
num_nodes, coeff);
break;
case ',':
@@ -490,8 +657,8 @@ done:
break;
default:
/* Give one final node */
- setup_node_range(num_nodes, nodes, &addr,
- max_addr - addr, max_addr);
+ setup_node_range(num_nodes, &addr, max_addr - addr,
+ max_addr);
num_nodes++;
}
}
@@ -505,14 +672,10 @@ out:
}
/*
- * We need to vacate all active ranges that may have been registered by
- * SRAT and set acpi_numa to -1 so that srat_disabled() always returns
- * true. NUMA emulation has succeeded so we will not scan ACPI nodes.
+ * We need to vacate all active ranges that may have been registered for
+ * the e820 memory map.
*/
remove_all_active_ranges();
-#ifdef CONFIG_ACPI_NUMA
- acpi_numa = -1;
-#endif
for_each_node_mask(i, node_possible_map) {
e820_register_active_regions(i, nodes[i].start >> PAGE_SHIFT,
nodes[i].end >> PAGE_SHIFT);
@@ -524,7 +687,8 @@ out:
}
#endif /* CONFIG_NUMA_EMU */
-void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn)
+void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn,
+ int acpi, int k8)
{
int i;
@@ -532,23 +696,22 @@ void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn)
nodes_clear(node_online_map);
#ifdef CONFIG_NUMA_EMU
- if (cmdline && !numa_emulation(start_pfn, last_pfn))
+ if (cmdline && !numa_emulation(start_pfn, last_pfn, acpi, k8))
return;
nodes_clear(node_possible_map);
nodes_clear(node_online_map);
#endif
#ifdef CONFIG_ACPI_NUMA
- if (!numa_off && !acpi_scan_nodes(start_pfn << PAGE_SHIFT,
- last_pfn << PAGE_SHIFT))
+ if (!numa_off && acpi && !acpi_scan_nodes(start_pfn << PAGE_SHIFT,
+ last_pfn << PAGE_SHIFT))
return;
nodes_clear(node_possible_map);
nodes_clear(node_online_map);
#endif
#ifdef CONFIG_K8_NUMA
- if (!numa_off && !k8_scan_nodes(start_pfn<<PAGE_SHIFT,
- last_pfn<<PAGE_SHIFT))
+ if (!numa_off && k8 && !k8_scan_nodes())
return;
nodes_clear(node_possible_map);
nodes_clear(node_online_map);
@@ -601,6 +764,25 @@ static __init int numa_setup(char *opt)
early_param("numa", numa_setup);
#ifdef CONFIG_NUMA
+
+static __init int find_near_online_node(int node)
+{
+ int n, val;
+ int min_val = INT_MAX;
+ int best_node = -1;
+
+ for_each_online_node(n) {
+ val = node_distance(node, n);
+
+ if (val < min_val) {
+ min_val = val;
+ best_node = n;
+ }
+ }
+
+ return best_node;
+}
+
/*
* Setup early cpu_to_node.
*
@@ -632,7 +814,7 @@ void __init init_cpu_to_node(void)
if (node == NUMA_NO_NODE)
continue;
if (!node_online(node))
- continue;
+ node = find_near_online_node(node);
numa_set_node(cpu, node);
}
}
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index dd38bfbefd1f..1d4eb93d333c 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -279,6 +279,22 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address,
__pa((unsigned long)__end_rodata) >> PAGE_SHIFT))
pgprot_val(forbidden) |= _PAGE_RW;
+#if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA)
+ /*
+ * Once the kernel maps the text as RO (kernel_set_to_readonly is set),
+ * kernel text mappings for the large page aligned text, rodata sections
+ * will be always read-only. For the kernel identity mappings covering
+ * the holes caused by this alignment can be anything that user asks.
+ *
+ * This will preserve the large page mappings for kernel text/data
+ * at no extra cost.
+ */
+ if (kernel_set_to_readonly &&
+ within(address, (unsigned long)_text,
+ (unsigned long)__end_rodata_hpage_align))
+ pgprot_val(forbidden) |= _PAGE_RW;
+#endif
+
prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden));
return prot;
@@ -1069,12 +1085,18 @@ EXPORT_SYMBOL(set_memory_array_wb);
int set_memory_x(unsigned long addr, int numpages)
{
+ if (!(__supported_pte_mask & _PAGE_NX))
+ return 0;
+
return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_NX), 0);
}
EXPORT_SYMBOL(set_memory_x);
int set_memory_nx(unsigned long addr, int numpages)
{
+ if (!(__supported_pte_mask & _PAGE_NX))
+ return 0;
+
return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_NX), 0);
}
EXPORT_SYMBOL(set_memory_nx);
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index e78cd0ec2bcf..ae9648eb1c7f 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -20,6 +20,7 @@
#include <asm/cacheflush.h>
#include <asm/processor.h>
#include <asm/tlbflush.h>
+#include <asm/x86_init.h>
#include <asm/pgtable.h>
#include <asm/fcntl.h>
#include <asm/e820.h>
@@ -355,9 +356,6 @@ static int free_ram_pages_type(u64 start, u64 end)
* - _PAGE_CACHE_UC_MINUS
* - _PAGE_CACHE_UC
*
- * req_type will have a special case value '-1', when requester want to inherit
- * the memory type from mtrr (if WB), existing PAT, defaulting to UC_MINUS.
- *
* If new_type is NULL, function will return an error if it cannot reserve the
* region with req_type. If new_type is non-NULL, function will return
* available type in new_type in case of no error. In case of any error
@@ -377,9 +375,7 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
if (!pat_enabled) {
/* This is identical to page table setting without PAT */
if (new_type) {
- if (req_type == -1)
- *new_type = _PAGE_CACHE_WB;
- else if (req_type == _PAGE_CACHE_WC)
+ if (req_type == _PAGE_CACHE_WC)
*new_type = _PAGE_CACHE_UC_MINUS;
else
*new_type = req_type & _PAGE_CACHE_MASK;
@@ -388,7 +384,7 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
}
/* Low ISA region is always mapped WB in page table. No need to track */
- if (is_ISA_range(start, end - 1)) {
+ if (x86_platform.is_untracked_pat_range(start, end)) {
if (new_type)
*new_type = _PAGE_CACHE_WB;
return 0;
@@ -499,7 +495,7 @@ int free_memtype(u64 start, u64 end)
return 0;
/* Low ISA region is always mapped WB. No need to track */
- if (is_ISA_range(start, end - 1))
+ if (x86_platform.is_untracked_pat_range(start, end))
return 0;
is_range_ram = pat_pagerange_is_ram(start, end);
@@ -582,7 +578,7 @@ static unsigned long lookup_memtype(u64 paddr)
int rettype = _PAGE_CACHE_WB;
struct memtype *entry;
- if (is_ISA_range(paddr, paddr + PAGE_SIZE - 1))
+ if (x86_platform.is_untracked_pat_range(paddr, paddr + PAGE_SIZE))
return rettype;
if (pat_pagerange_is_ram(paddr, paddr + PAGE_SIZE)) {
@@ -708,9 +704,8 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
if (!range_is_allowed(pfn, size))
return 0;
- if (file->f_flags & O_SYNC) {
+ if (file->f_flags & O_DSYNC)
flags = _PAGE_CACHE_UC_MINUS;
- }
#ifdef CONFIG_X86_32
/*
@@ -1018,8 +1013,10 @@ static const struct file_operations memtype_fops = {
static int __init pat_memtype_list_init(void)
{
- debugfs_create_file("pat_memtype_list", S_IRUSR, arch_debugfs_dir,
- NULL, &memtype_fops);
+ if (pat_enabled) {
+ debugfs_create_file("pat_memtype_list", S_IRUSR,
+ arch_debugfs_dir, NULL, &memtype_fops);
+ }
return 0;
}
diff --git a/arch/x86/mm/setup_nx.c b/arch/x86/mm/setup_nx.c
index 513d8ed5d2ec..a3250aa34086 100644
--- a/arch/x86/mm/setup_nx.c
+++ b/arch/x86/mm/setup_nx.c
@@ -3,10 +3,8 @@
#include <linux/init.h>
#include <asm/pgtable.h>
+#include <asm/proto.h>
-int nx_enabled;
-
-#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
static int disable_nx __cpuinitdata;
/*
@@ -22,48 +20,41 @@ static int __init noexec_setup(char *str)
if (!str)
return -EINVAL;
if (!strncmp(str, "on", 2)) {
- __supported_pte_mask |= _PAGE_NX;
disable_nx = 0;
} else if (!strncmp(str, "off", 3)) {
disable_nx = 1;
- __supported_pte_mask &= ~_PAGE_NX;
}
+ x86_configure_nx();
return 0;
}
early_param("noexec", noexec_setup);
-#endif
-#ifdef CONFIG_X86_PAE
-void __init set_nx(void)
+void __cpuinit x86_configure_nx(void)
{
- unsigned int v[4], l, h;
-
- if (cpu_has_pae && (cpuid_eax(0x80000000) > 0x80000001)) {
- cpuid(0x80000001, &v[0], &v[1], &v[2], &v[3]);
+ if (cpu_has_nx && !disable_nx)
+ __supported_pte_mask |= _PAGE_NX;
+ else
+ __supported_pte_mask &= ~_PAGE_NX;
+}
- if ((v[3] & (1 << 20)) && !disable_nx) {
- rdmsr(MSR_EFER, l, h);
- l |= EFER_NX;
- wrmsr(MSR_EFER, l, h);
- nx_enabled = 1;
- __supported_pte_mask |= _PAGE_NX;
+void __init x86_report_nx(void)
+{
+ if (!cpu_has_nx) {
+ printk(KERN_NOTICE "Notice: NX (Execute Disable) protection "
+ "missing in CPU or disabled in BIOS!\n");
+ } else {
+#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
+ if (disable_nx) {
+ printk(KERN_INFO "NX (Execute Disable) protection: "
+ "disabled by kernel command line option\n");
+ } else {
+ printk(KERN_INFO "NX (Execute Disable) protection: "
+ "active\n");
}
- }
-}
#else
-void set_nx(void)
-{
-}
+ /* 32bit non-PAE kernel, NX cannot be used */
+ printk(KERN_NOTICE "Notice: NX (Execute Disable) protection "
+ "cannot be enabled: non-PAE kernel!\n");
#endif
-
-#ifdef CONFIG_X86_64
-void __cpuinit check_efer(void)
-{
- unsigned long efer;
-
- rdmsrl(MSR_EFER, efer);
- if (!(efer & EFER_NX) || disable_nx)
- __supported_pte_mask &= ~_PAGE_NX;
+ }
}
-#endif
-
diff --git a/arch/x86/mm/srat_32.c b/arch/x86/mm/srat_32.c
index 6f8aa33031c7..9324f13492d5 100644
--- a/arch/x86/mm/srat_32.c
+++ b/arch/x86/mm/srat_32.c
@@ -267,6 +267,8 @@ int __init get_memcfg_from_srat(void)
e820_register_active_regions(chunk->nid, chunk->start_pfn,
min(chunk->end_pfn, max_pfn));
}
+ /* for out of order entries in SRAT */
+ sort_node_map();
for_each_online_node(nid) {
unsigned long start = node_start_pfn[nid];
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index 9d7ce96e5a5c..a27124185fc1 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -290,8 +290,6 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
printk(KERN_INFO "SRAT: Node %u PXM %u %lx-%lx\n", node, pxm,
start, end);
- e820_register_active_regions(node, start >> PAGE_SHIFT,
- end >> PAGE_SHIFT);
if (ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) {
update_nodes_add(node, start, end);
@@ -319,7 +317,7 @@ static int __init nodes_cover_memory(const struct bootnode *nodes)
unsigned long s = nodes[i].start >> PAGE_SHIFT;
unsigned long e = nodes[i].end >> PAGE_SHIFT;
pxmram += e - s;
- pxmram -= absent_pages_in_range(s, e);
+ pxmram -= __absent_pages_in_range(i, s, e);
if ((long)pxmram < 0)
pxmram = 0;
}
@@ -338,6 +336,19 @@ static int __init nodes_cover_memory(const struct bootnode *nodes)
void __init acpi_numa_arch_fixup(void) {}
+int __init acpi_get_nodes(struct bootnode *physnodes)
+{
+ int i;
+ int ret = 0;
+
+ for_each_node_mask(i, nodes_parsed) {
+ physnodes[ret].start = nodes[i].start;
+ physnodes[ret].end = nodes[i].end;
+ ret++;
+ }
+ return ret;
+}
+
/* Use the information discovered above to actually set up the nodes. */
int __init acpi_scan_nodes(unsigned long start, unsigned long end)
{
@@ -350,11 +361,6 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
for (i = 0; i < MAX_NUMNODES; i++)
cutoff_node(i, start, end);
- if (!nodes_cover_memory(nodes)) {
- bad_srat();
- return -1;
- }
-
memnode_shift = compute_hash_shift(node_memblk_range, num_node_memblks,
memblk_nodeid);
if (memnode_shift < 0) {
@@ -364,6 +370,16 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
return -1;
}
+ for_each_node_mask(i, nodes_parsed)
+ e820_register_active_regions(i, nodes[i].start >> PAGE_SHIFT,
+ nodes[i].end >> PAGE_SHIFT);
+ /* for out of order entries in SRAT */
+ sort_node_map();
+ if (!nodes_cover_memory(nodes)) {
+ bad_srat();
+ return -1;
+ }
+
/* Account for nodes with cpus and no memory */
nodes_or(node_possible_map, nodes_parsed, cpu_nodes_parsed);
@@ -454,7 +470,6 @@ void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes)
for (i = 0; i < num_nodes; i++)
if (fake_nodes[i].start != fake_nodes[i].end)
node_set(i, nodes_parsed);
- WARN_ON(!nodes_cover_memory(fake_nodes));
}
static int null_slit_node_compare(int a, int b)
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 36fe08eeb5c3..65b58e4b0b8b 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -8,6 +8,7 @@
#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
+#include <asm/cache.h>
#include <asm/apic.h>
#include <asm/uv/uv.h>
@@ -43,7 +44,7 @@ union smp_flush_state {
spinlock_t tlbstate_lock;
DECLARE_BITMAP(flush_cpumask, NR_CPUS);
};
- char pad[CONFIG_X86_INTERNODE_CACHE_BYTES];
+ char pad[INTERNODE_CACHE_BYTES];
} ____cacheline_internodealigned_in_smp;
/* State is put into the per CPU data section, but padded
diff --git a/arch/x86/oprofile/backtrace.c b/arch/x86/oprofile/backtrace.c
index 044897be021f..3855096c59b8 100644
--- a/arch/x86/oprofile/backtrace.c
+++ b/arch/x86/oprofile/backtrace.c
@@ -41,10 +41,11 @@ static void backtrace_address(void *data, unsigned long addr, int reliable)
}
static struct stacktrace_ops backtrace_ops = {
- .warning = backtrace_warning,
- .warning_symbol = backtrace_warning_symbol,
- .stack = backtrace_stack,
- .address = backtrace_address,
+ .warning = backtrace_warning,
+ .warning_symbol = backtrace_warning_symbol,
+ .stack = backtrace_stack,
+ .address = backtrace_address,
+ .walk_stack = print_context_stack,
};
struct frame_head {
diff --git a/arch/x86/pci/Makefile b/arch/x86/pci/Makefile
index d49202e740ea..564b008a51c7 100644
--- a/arch/x86/pci/Makefile
+++ b/arch/x86/pci/Makefile
@@ -15,3 +15,8 @@ obj-$(CONFIG_X86_NUMAQ) += numaq_32.o
obj-y += common.o early.o
obj-y += amd_bus.o
+obj-$(CONFIG_X86_64) += bus_numa.o intel_bus.o
+
+ifeq ($(CONFIG_PCI_DEBUG),y)
+EXTRA_CFLAGS += -DDEBUG
+endif
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index 1014eb4bfc37..959e548a7039 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -7,6 +7,7 @@
#include <asm/pci_x86.h>
struct pci_root_info {
+ struct acpi_device *bridge;
char *name;
unsigned int res_num;
struct resource *res;
@@ -58,6 +59,30 @@ bus_has_transparent_bridge(struct pci_bus *bus)
return false;
}
+static void
+align_resource(struct acpi_device *bridge, struct resource *res)
+{
+ int align = (res->flags & IORESOURCE_MEM) ? 16 : 4;
+
+ /*
+ * Host bridge windows are not BARs, but the decoders on the PCI side
+ * that claim this address space have starting alignment and length
+ * constraints, so fix any obvious BIOS goofs.
+ */
+ if (!IS_ALIGNED(res->start, align)) {
+ dev_printk(KERN_DEBUG, &bridge->dev,
+ "host bridge window %pR invalid; "
+ "aligning start to %d-byte boundary\n", res, align);
+ res->start &= ~(align - 1);
+ }
+ if (!IS_ALIGNED(res->end + 1, align)) {
+ dev_printk(KERN_DEBUG, &bridge->dev,
+ "host bridge window %pR invalid; "
+ "aligning end to %d-byte boundary\n", res, align);
+ res->end = ALIGN(res->end, align) - 1;
+ }
+}
+
static acpi_status
setup_resource(struct acpi_resource *acpi_res, void *data)
{
@@ -91,11 +116,12 @@ setup_resource(struct acpi_resource *acpi_res, void *data)
start = addr.minimum + addr.translation_offset;
end = start + addr.address_length - 1;
if (info->res_num >= max_root_bus_resources) {
- printk(KERN_WARNING "PCI: Failed to allocate 0x%lx-0x%lx "
- "from %s for %s due to _CRS returning more than "
- "%d resource descriptors\n", (unsigned long) start,
- (unsigned long) end, root->name, info->name,
- max_root_bus_resources);
+ if (pci_probe & PCI_USE__CRS)
+ printk(KERN_WARNING "PCI: Failed to allocate "
+ "0x%lx-0x%lx from %s for %s due to _CRS "
+ "returning more than %d resource descriptors\n",
+ (unsigned long) start, (unsigned long) end,
+ root->name, info->name, max_root_bus_resources);
return AE_OK;
}
@@ -105,14 +131,28 @@ setup_resource(struct acpi_resource *acpi_res, void *data)
res->start = start;
res->end = end;
res->child = NULL;
+ align_resource(info->bridge, res);
+
+ if (!(pci_probe & PCI_USE__CRS)) {
+ dev_printk(KERN_DEBUG, &info->bridge->dev,
+ "host bridge window %pR (ignored)\n", res);
+ return AE_OK;
+ }
if (insert_resource(root, res)) {
- printk(KERN_ERR "PCI: Failed to allocate 0x%lx-0x%lx "
- "from %s for %s\n", (unsigned long) res->start,
- (unsigned long) res->end, root->name, info->name);
+ dev_err(&info->bridge->dev,
+ "can't allocate host bridge window %pR\n", res);
} else {
info->bus->resource[info->res_num] = res;
info->res_num++;
+ if (addr.translation_offset)
+ dev_info(&info->bridge->dev, "host bridge window %pR "
+ "(PCI address [%#llx-%#llx])\n",
+ res, res->start - addr.translation_offset,
+ res->end - addr.translation_offset);
+ else
+ dev_info(&info->bridge->dev,
+ "host bridge window %pR\n", res);
}
return AE_OK;
}
@@ -124,6 +164,12 @@ get_current_resources(struct acpi_device *device, int busnum,
struct pci_root_info info;
size_t size;
+ if (!(pci_probe & PCI_USE__CRS))
+ dev_info(&device->dev,
+ "ignoring host bridge windows from ACPI; "
+ "boot with \"pci=use_crs\" to use them\n");
+
+ info.bridge = device;
info.bus = bus;
info.res_num = 0;
acpi_walk_resources(device->handle, METHOD_NAME__CRS, count_resource,
@@ -163,8 +209,9 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int do
#endif
if (domain && !pci_domains_supported) {
- printk(KERN_WARNING "PCI: Multiple domains not supported "
- "(dom %d, bus %d)\n", domain, busnum);
+ printk(KERN_WARNING "pci_bus %04x:%02x: "
+ "ignored (multiple domains not supported)\n",
+ domain, busnum);
return NULL;
}
@@ -188,7 +235,8 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int do
*/
sd = kzalloc(sizeof(*sd), GFP_KERNEL);
if (!sd) {
- printk(KERN_ERR "PCI: OOM, not probing PCI bus %02x\n", busnum);
+ printk(KERN_WARNING "pci_bus %04x:%02x: "
+ "ignored (out of memory)\n", domain, busnum);
return NULL;
}
@@ -209,9 +257,7 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int do
} else {
bus = pci_create_bus(NULL, busnum, &pci_root_ops, sd);
if (bus) {
- if (pci_probe & PCI_USE__CRS)
- get_current_resources(device, busnum, domain,
- bus);
+ get_current_resources(device, busnum, domain, bus);
bus->subordinate = pci_scan_child_bus(bus);
}
}
diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c
index 572ee9782f2a..95ecbd495955 100644
--- a/arch/x86/pci/amd_bus.c
+++ b/arch/x86/pci/amd_bus.c
@@ -6,10 +6,10 @@
#ifdef CONFIG_X86_64
#include <asm/pci-direct.h>
-#include <asm/mpspec.h>
-#include <linux/cpumask.h>
#endif
+#include "bus_numa.h"
+
/*
* This discovers the pcibus <-> node mapping on AMD K8.
* also get peer root bus resource for io,mmio
@@ -17,67 +17,6 @@
#ifdef CONFIG_X86_64
-/*
- * sub bus (transparent) will use entres from 3 to store extra from root,
- * so need to make sure have enought slot there, increase PCI_BUS_NUM_RESOURCES?
- */
-#define RES_NUM 16
-struct pci_root_info {
- char name[12];
- unsigned int res_num;
- struct resource res[RES_NUM];
- int bus_min;
- int bus_max;
- int node;
- int link;
-};
-
-/* 4 at this time, it may become to 32 */
-#define PCI_ROOT_NR 4
-static int pci_root_num;
-static struct pci_root_info pci_root_info[PCI_ROOT_NR];
-
-void x86_pci_root_bus_res_quirks(struct pci_bus *b)
-{
- int i;
- int j;
- struct pci_root_info *info;
-
- /* don't go for it if _CRS is used already */
- if (b->resource[0] != &ioport_resource ||
- b->resource[1] != &iomem_resource)
- return;
-
- /* if only one root bus, don't need to anything */
- if (pci_root_num < 2)
- return;
-
- for (i = 0; i < pci_root_num; i++) {
- if (pci_root_info[i].bus_min == b->number)
- break;
- }
-
- if (i == pci_root_num)
- return;
-
- printk(KERN_DEBUG "PCI: peer root bus %02x res updated from pci conf\n",
- b->number);
-
- info = &pci_root_info[i];
- for (j = 0; j < info->res_num; j++) {
- struct resource *res;
- struct resource *root;
-
- res = &info->res[j];
- b->resource[j] = res;
- if (res->flags & IORESOURCE_IO)
- root = &ioport_resource;
- else
- root = &iomem_resource;
- insert_resource(root, res);
- }
-}
-
#define RANGE_NUM 16
struct res_range {
@@ -130,52 +69,6 @@ static void __init update_range(struct res_range *range, size_t start,
}
}
-static void __init update_res(struct pci_root_info *info, size_t start,
- size_t end, unsigned long flags, int merge)
-{
- int i;
- struct resource *res;
-
- if (!merge)
- goto addit;
-
- /* try to merge it with old one */
- for (i = 0; i < info->res_num; i++) {
- size_t final_start, final_end;
- size_t common_start, common_end;
-
- res = &info->res[i];
- if (res->flags != flags)
- continue;
-
- common_start = max((size_t)res->start, start);
- common_end = min((size_t)res->end, end);
- if (common_start > common_end + 1)
- continue;
-
- final_start = min((size_t)res->start, start);
- final_end = max((size_t)res->end, end);
-
- res->start = final_start;
- res->end = final_end;
- return;
- }
-
-addit:
-
- /* need to add that */
- if (info->res_num >= RES_NUM)
- return;
-
- res = &info->res[info->res_num];
- res->name = info->name;
- res->flags = flags;
- res->start = start;
- res->end = end;
- res->child = NULL;
- info->res_num++;
-}
-
struct pci_hostbridge_probe {
u32 bus;
u32 slot;
@@ -230,7 +123,6 @@ static int __init early_fill_mp_bus_info(void)
int j;
unsigned bus;
unsigned slot;
- int found;
int node;
int link;
int def_node;
@@ -247,7 +139,7 @@ static int __init early_fill_mp_bus_info(void)
if (!early_pci_allowed())
return -1;
- found = 0;
+ found_all_numa_early = 0;
for (i = 0; i < ARRAY_SIZE(pci_probes); i++) {
u32 id;
u16 device;
@@ -261,12 +153,12 @@ static int __init early_fill_mp_bus_info(void)
device = (id>>16) & 0xffff;
if (pci_probes[i].vendor == vendor &&
pci_probes[i].device == device) {
- found = 1;
+ found_all_numa_early = 1;
break;
}
}
- if (!found)
+ if (!found_all_numa_early)
return 0;
pci_root_num = 0;
@@ -488,7 +380,7 @@ static int __init early_fill_mp_bus_info(void)
info = &pci_root_info[i];
res_num = info->res_num;
busnum = info->bus_min;
- printk(KERN_DEBUG "bus: [%02x,%02x] on node %x link %x\n",
+ printk(KERN_DEBUG "bus: [%02x, %02x] on node %x link %x\n",
info->bus_min, info->bus_max, info->node, info->link);
for (j = 0; j < res_num; j++) {
res = &info->res[j];
diff --git a/arch/x86/pci/bus_numa.c b/arch/x86/pci/bus_numa.c
new file mode 100644
index 000000000000..145df00e0387
--- /dev/null
+++ b/arch/x86/pci/bus_numa.c
@@ -0,0 +1,101 @@
+#include <linux/init.h>
+#include <linux/pci.h>
+
+#include "bus_numa.h"
+
+int pci_root_num;
+struct pci_root_info pci_root_info[PCI_ROOT_NR];
+int found_all_numa_early;
+
+void x86_pci_root_bus_res_quirks(struct pci_bus *b)
+{
+ int i;
+ int j;
+ struct pci_root_info *info;
+
+ /* don't go for it if _CRS is used already */
+ if (b->resource[0] != &ioport_resource ||
+ b->resource[1] != &iomem_resource)
+ return;
+
+ if (!pci_root_num)
+ return;
+
+ /* for amd, if only one root bus, don't need to do anything */
+ if (pci_root_num < 2 && found_all_numa_early)
+ return;
+
+ for (i = 0; i < pci_root_num; i++) {
+ if (pci_root_info[i].bus_min == b->number)
+ break;
+ }
+
+ if (i == pci_root_num)
+ return;
+
+ printk(KERN_DEBUG "PCI: peer root bus %02x res updated from pci conf\n",
+ b->number);
+
+ info = &pci_root_info[i];
+ for (j = 0; j < info->res_num; j++) {
+ struct resource *res;
+ struct resource *root;
+
+ res = &info->res[j];
+ b->resource[j] = res;
+ if (res->flags & IORESOURCE_IO)
+ root = &ioport_resource;
+ else
+ root = &iomem_resource;
+ insert_resource(root, res);
+ }
+}
+
+void __init update_res(struct pci_root_info *info, size_t start,
+ size_t end, unsigned long flags, int merge)
+{
+ int i;
+ struct resource *res;
+
+ if (start > end)
+ return;
+
+ if (!merge)
+ goto addit;
+
+ /* try to merge it with old one */
+ for (i = 0; i < info->res_num; i++) {
+ size_t final_start, final_end;
+ size_t common_start, common_end;
+
+ res = &info->res[i];
+ if (res->flags != flags)
+ continue;
+
+ common_start = max((size_t)res->start, start);
+ common_end = min((size_t)res->end, end);
+ if (common_start > common_end + 1)
+ continue;
+
+ final_start = min((size_t)res->start, start);
+ final_end = max((size_t)res->end, end);
+
+ res->start = final_start;
+ res->end = final_end;
+ return;
+ }
+
+addit:
+
+ /* need to add that */
+ if (info->res_num >= RES_NUM)
+ return;
+
+ res = &info->res[info->res_num];
+ res->name = info->name;
+ res->flags = flags;
+ res->start = start;
+ res->end = end;
+ res->child = NULL;
+ info->res_num++;
+}
diff --git a/arch/x86/pci/bus_numa.h b/arch/x86/pci/bus_numa.h
new file mode 100644
index 000000000000..adbc23fe82ac
--- /dev/null
+++ b/arch/x86/pci/bus_numa.h
@@ -0,0 +1,27 @@
+#ifdef CONFIG_X86_64
+
+/*
+ * sub bus (transparent) will use entres from 3 to store extra from
+ * root, so need to make sure we have enough slot there, Should we
+ * increase PCI_BUS_NUM_RESOURCES?
+ */
+#define RES_NUM 16
+struct pci_root_info {
+ char name[12];
+ unsigned int res_num;
+ struct resource res[RES_NUM];
+ int bus_min;
+ int bus_max;
+ int node;
+ int link;
+};
+
+/* 4 at this time, it may become to 32 */
+#define PCI_ROOT_NR 4
+extern int pci_root_num;
+extern struct pci_root_info pci_root_info[PCI_ROOT_NR];
+extern int found_all_numa_early;
+
+extern void update_res(struct pci_root_info *info, size_t start,
+ size_t end, unsigned long flags, int merge);
+#endif
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index 1331fcf26143..d2552c68e94d 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -410,8 +410,6 @@ struct pci_bus * __devinit pcibios_scan_root(int busnum)
return bus;
}
-extern u8 pci_cache_line_size;
-
int __init pcibios_init(void)
{
struct cpuinfo_x86 *c = &boot_cpu_data;
@@ -422,15 +420,19 @@ int __init pcibios_init(void)
}
/*
- * Assume PCI cacheline size of 32 bytes for all x86s except K7/K8
- * and P4. It's also good for 386/486s (which actually have 16)
+ * Set PCI cacheline size to that of the CPU if the CPU has reported it.
+ * (For older CPUs that don't support cpuid, we se it to 32 bytes
+ * It's also good for 386/486s (which actually have 16)
* as quite a few PCI devices do not support smaller values.
*/
- pci_cache_line_size = 32 >> 2;
- if (c->x86 >= 6 && c->x86_vendor == X86_VENDOR_AMD)
- pci_cache_line_size = 64 >> 2; /* K7 & K8 */
- else if (c->x86 > 6 && c->x86_vendor == X86_VENDOR_INTEL)
- pci_cache_line_size = 128 >> 2; /* P4 */
+ if (c->x86_clflush_size > 0) {
+ pci_dfl_cache_line_size = c->x86_clflush_size >> 2;
+ printk(KERN_DEBUG "PCI: pci_cache_line_size set to %d bytes\n",
+ pci_dfl_cache_line_size << 2);
+ } else {
+ pci_dfl_cache_line_size = 32 >> 2;
+ printk(KERN_DEBUG "PCI: Unknown cacheline size. Setting to 32 bytes\n");
+ }
pcibios_resource_survey();
diff --git a/arch/x86/pci/early.c b/arch/x86/pci/early.c
index aaf26ae58cd5..d1067d539bee 100644
--- a/arch/x86/pci/early.c
+++ b/arch/x86/pci/early.c
@@ -12,8 +12,6 @@ u32 read_pci_config(u8 bus, u8 slot, u8 func, u8 offset)
u32 v;
outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8);
v = inl(0xcfc);
- if (v != 0xffffffff)
- pr_debug("%x reading 4 from %x: %x\n", slot, offset, v);
return v;
}
@@ -22,7 +20,6 @@ u8 read_pci_config_byte(u8 bus, u8 slot, u8 func, u8 offset)
u8 v;
outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8);
v = inb(0xcfc + (offset&3));
- pr_debug("%x reading 1 from %x: %x\n", slot, offset, v);
return v;
}
@@ -31,28 +28,24 @@ u16 read_pci_config_16(u8 bus, u8 slot, u8 func, u8 offset)
u16 v;
outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8);
v = inw(0xcfc + (offset&2));
- pr_debug("%x reading 2 from %x: %x\n", slot, offset, v);
return v;
}
void write_pci_config(u8 bus, u8 slot, u8 func, u8 offset,
u32 val)
{
- pr_debug("%x writing to %x: %x\n", slot, offset, val);
outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8);
outl(val, 0xcfc);
}
void write_pci_config_byte(u8 bus, u8 slot, u8 func, u8 offset, u8 val)
{
- pr_debug("%x writing to %x: %x\n", slot, offset, val);
outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8);
outb(val, 0xcfc + (offset&3));
}
void write_pci_config_16(u8 bus, u8 slot, u8 func, u8 offset, u16 val)
{
- pr_debug("%x writing to %x: %x\n", slot, offset, val);
outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8);
outw(val, 0xcfc + (offset&2));
}
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index b22d13b0c71d..5dc9e8c63fcd 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -129,7 +129,9 @@ static void __init pcibios_allocate_bus_resources(struct list_head *bus_list)
continue;
if (!r->start ||
pci_claim_resource(dev, idx) < 0) {
- dev_info(&dev->dev, "BAR %d: can't allocate resource\n", idx);
+ dev_info(&dev->dev,
+ "can't reserve window %pR\n",
+ r);
/*
* Something is wrong with the region.
* Invalidate the resource to prevent
@@ -144,16 +146,29 @@ static void __init pcibios_allocate_bus_resources(struct list_head *bus_list)
}
}
+struct pci_check_idx_range {
+ int start;
+ int end;
+};
+
static void __init pcibios_allocate_resources(int pass)
{
struct pci_dev *dev = NULL;
- int idx, disabled;
+ int idx, disabled, i;
u16 command;
struct resource *r;
+ struct pci_check_idx_range idx_range[] = {
+ { PCI_STD_RESOURCES, PCI_STD_RESOURCE_END },
+#ifdef CONFIG_PCI_IOV
+ { PCI_IOV_RESOURCES, PCI_IOV_RESOURCE_END },
+#endif
+ };
+
for_each_pci_dev(dev) {
pci_read_config_word(dev, PCI_COMMAND, &command);
- for (idx = 0; idx < PCI_ROM_RESOURCE; idx++) {
+ for (i = 0; i < ARRAY_SIZE(idx_range); i++)
+ for (idx = idx_range[i].start; idx <= idx_range[i].end; idx++) {
r = &dev->resource[idx];
if (r->parent) /* Already allocated */
continue;
@@ -164,12 +179,12 @@ static void __init pcibios_allocate_resources(int pass)
else
disabled = !(command & PCI_COMMAND_MEMORY);
if (pass == disabled) {
- dev_dbg(&dev->dev, "resource %#08llx-%#08llx (f=%lx, d=%d, p=%d)\n",
- (unsigned long long) r->start,
- (unsigned long long) r->end,
- r->flags, disabled, pass);
+ dev_dbg(&dev->dev,
+ "BAR %d: reserving %pr (d=%d, p=%d)\n",
+ idx, r, disabled, pass);
if (pci_claim_resource(dev, idx) < 0) {
- dev_info(&dev->dev, "BAR %d: can't allocate resource\n", idx);
+ dev_info(&dev->dev,
+ "can't reserve %pR\n", r);
/* We'll assign a new address later */
r->end -= r->start;
r->start = 0;
@@ -182,7 +197,7 @@ static void __init pcibios_allocate_resources(int pass)
/* Turn the ROM off, leave the resource region,
* but keep it unregistered. */
u32 reg;
- dev_dbg(&dev->dev, "disabling ROM\n");
+ dev_dbg(&dev->dev, "disabling ROM %pR\n", r);
r->flags &= ~IORESOURCE_ROM_ENABLE;
pci_read_config_dword(dev,
dev->rom_base_reg, &reg);
@@ -282,6 +297,15 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
return -EINVAL;
prot = pgprot_val(vma->vm_page_prot);
+
+ /*
+ * Return error if pat is not enabled and write_combine is requested.
+ * Caller can followup with UC MINUS request and add a WC mtrr if there
+ * is a free mtrr slot.
+ */
+ if (!pat_enabled && write_combine)
+ return -EINVAL;
+
if (pat_enabled && write_combine)
prot |= _PAGE_CACHE_WC;
else if (pat_enabled || boot_cpu_data.x86 > 3)
diff --git a/arch/x86/pci/intel_bus.c b/arch/x86/pci/intel_bus.c
new file mode 100644
index 000000000000..b7a55dc55d13
--- /dev/null
+++ b/arch/x86/pci/intel_bus.c
@@ -0,0 +1,90 @@
+/*
+ * to read io range from IOH pci conf, need to do it after mmconfig is there
+ */
+
+#include <linux/delay.h>
+#include <linux/dmi.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <asm/pci_x86.h>
+
+#include "bus_numa.h"
+
+static inline void print_ioh_resources(struct pci_root_info *info)
+{
+ int res_num;
+ int busnum;
+ int i;
+
+ printk(KERN_DEBUG "IOH bus: [%02x, %02x]\n",
+ info->bus_min, info->bus_max);
+ res_num = info->res_num;
+ busnum = info->bus_min;
+ for (i = 0; i < res_num; i++) {
+ struct resource *res;
+
+ res = &info->res[i];
+ printk(KERN_DEBUG "IOH bus: %02x index %x %s: [%llx, %llx]\n",
+ busnum, i,
+ (res->flags & IORESOURCE_IO) ? "io port" :
+ "mmio",
+ res->start, res->end);
+ }
+}
+
+#define IOH_LIO 0x108
+#define IOH_LMMIOL 0x10c
+#define IOH_LMMIOH 0x110
+#define IOH_LMMIOH_BASEU 0x114
+#define IOH_LMMIOH_LIMITU 0x118
+#define IOH_LCFGBUS 0x11c
+
+static void __devinit pci_root_bus_res(struct pci_dev *dev)
+{
+ u16 word;
+ u32 dword;
+ struct pci_root_info *info;
+ u16 io_base, io_end;
+ u32 mmiol_base, mmiol_end;
+ u64 mmioh_base, mmioh_end;
+ int bus_base, bus_end;
+
+ if (pci_root_num >= PCI_ROOT_NR) {
+ printk(KERN_DEBUG "intel_bus.c: PCI_ROOT_NR is too small\n");
+ return;
+ }
+
+ info = &pci_root_info[pci_root_num];
+ pci_root_num++;
+
+ pci_read_config_word(dev, IOH_LCFGBUS, &word);
+ bus_base = (word & 0xff);
+ bus_end = (word & 0xff00) >> 8;
+ sprintf(info->name, "PCI Bus #%02x", bus_base);
+ info->bus_min = bus_base;
+ info->bus_max = bus_end;
+
+ pci_read_config_word(dev, IOH_LIO, &word);
+ io_base = (word & 0xf0) << (12 - 4);
+ io_end = (word & 0xf000) | 0xfff;
+ update_res(info, io_base, io_end, IORESOURCE_IO, 0);
+
+ pci_read_config_dword(dev, IOH_LMMIOL, &dword);
+ mmiol_base = (dword & 0xff00) << (24 - 8);
+ mmiol_end = (dword & 0xff000000) | 0xffffff;
+ update_res(info, mmiol_base, mmiol_end, IORESOURCE_MEM, 0);
+
+ pci_read_config_dword(dev, IOH_LMMIOH, &dword);
+ mmioh_base = ((u64)(dword & 0xfc00)) << (26 - 10);
+ mmioh_end = ((u64)(dword & 0xfc000000) | 0x3ffffff);
+ pci_read_config_dword(dev, IOH_LMMIOH_BASEU, &dword);
+ mmioh_base |= ((u64)(dword & 0x7ffff)) << 32;
+ pci_read_config_dword(dev, IOH_LMMIOH_LIMITU, &dword);
+ mmioh_end |= ((u64)(dword & 0x7ffff)) << 32;
+ update_res(info, mmioh_base, mmioh_end, IORESOURCE_MEM, 0);
+
+ print_ioh_resources(info);
+}
+
+/* intel IOH */
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x342e, pci_root_bus_res);
diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c
index 602c172d3bd5..b19d1e54201e 100644
--- a/arch/x86/pci/mmconfig-shared.c
+++ b/arch/x86/pci/mmconfig-shared.c
@@ -15,48 +15,98 @@
#include <linux/acpi.h>
#include <linux/sfi_acpi.h>
#include <linux/bitmap.h>
-#include <linux/sort.h>
+#include <linux/dmi.h>
#include <asm/e820.h>
#include <asm/pci_x86.h>
#include <asm/acpi.h>
#define PREFIX "PCI: "
-/* aperture is up to 256MB but BIOS may reserve less */
-#define MMCONFIG_APER_MIN (2 * 1024*1024)
-#define MMCONFIG_APER_MAX (256 * 1024*1024)
-
/* Indicate if the mmcfg resources have been placed into the resource table. */
static int __initdata pci_mmcfg_resources_inserted;
-static __init int extend_mmcfg(int num)
+LIST_HEAD(pci_mmcfg_list);
+
+static __init void pci_mmconfig_remove(struct pci_mmcfg_region *cfg)
{
- struct acpi_mcfg_allocation *new;
- int new_num = pci_mmcfg_config_num + num;
+ if (cfg->res.parent)
+ release_resource(&cfg->res);
+ list_del(&cfg->list);
+ kfree(cfg);
+}
- new = kzalloc(sizeof(pci_mmcfg_config[0]) * new_num, GFP_KERNEL);
- if (!new)
- return -1;
+static __init void free_all_mmcfg(void)
+{
+ struct pci_mmcfg_region *cfg, *tmp;
- if (pci_mmcfg_config) {
- memcpy(new, pci_mmcfg_config,
- sizeof(pci_mmcfg_config[0]) * new_num);
- kfree(pci_mmcfg_config);
+ pci_mmcfg_arch_free();
+ list_for_each_entry_safe(cfg, tmp, &pci_mmcfg_list, list)
+ pci_mmconfig_remove(cfg);
+}
+
+static __init void list_add_sorted(struct pci_mmcfg_region *new)
+{
+ struct pci_mmcfg_region *cfg;
+
+ /* keep list sorted by segment and starting bus number */
+ list_for_each_entry(cfg, &pci_mmcfg_list, list) {
+ if (cfg->segment > new->segment ||
+ (cfg->segment == new->segment &&
+ cfg->start_bus >= new->start_bus)) {
+ list_add_tail(&new->list, &cfg->list);
+ return;
+ }
}
- pci_mmcfg_config = new;
+ list_add_tail(&new->list, &pci_mmcfg_list);
+}
- return 0;
+static __init struct pci_mmcfg_region *pci_mmconfig_add(int segment, int start,
+ int end, u64 addr)
+{
+ struct pci_mmcfg_region *new;
+ int num_buses;
+ struct resource *res;
+
+ if (addr == 0)
+ return NULL;
+
+ new = kzalloc(sizeof(*new), GFP_KERNEL);
+ if (!new)
+ return NULL;
+
+ new->address = addr;
+ new->segment = segment;
+ new->start_bus = start;
+ new->end_bus = end;
+
+ list_add_sorted(new);
+
+ num_buses = end - start + 1;
+ res = &new->res;
+ res->start = addr + PCI_MMCFG_BUS_OFFSET(start);
+ res->end = addr + PCI_MMCFG_BUS_OFFSET(num_buses) - 1;
+ res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+ snprintf(new->name, PCI_MMCFG_RESOURCE_NAME_LEN,
+ "PCI MMCONFIG %04x [bus %02x-%02x]", segment, start, end);
+ res->name = new->name;
+
+ printk(KERN_INFO PREFIX "MMCONFIG for domain %04x [bus %02x-%02x] at "
+ "%pR (base %#lx)\n", segment, start, end, &new->res,
+ (unsigned long) addr);
+
+ return new;
}
-static __init void fill_one_mmcfg(u64 addr, int segment, int start, int end)
+struct pci_mmcfg_region *pci_mmconfig_lookup(int segment, int bus)
{
- int i = pci_mmcfg_config_num;
+ struct pci_mmcfg_region *cfg;
- pci_mmcfg_config_num++;
- pci_mmcfg_config[i].address = addr;
- pci_mmcfg_config[i].pci_segment = segment;
- pci_mmcfg_config[i].start_bus_number = start;
- pci_mmcfg_config[i].end_bus_number = end;
+ list_for_each_entry(cfg, &pci_mmcfg_list, list)
+ if (cfg->segment == segment &&
+ cfg->start_bus <= bus && bus <= cfg->end_bus)
+ return cfg;
+
+ return NULL;
}
static const char __init *pci_mmcfg_e7520(void)
@@ -68,11 +118,9 @@ static const char __init *pci_mmcfg_e7520(void)
if (win == 0x0000 || win == 0xf000)
return NULL;
- if (extend_mmcfg(1) == -1)
+ if (pci_mmconfig_add(0, 0, 255, win << 16) == NULL)
return NULL;
- fill_one_mmcfg(win << 16, 0, 0, 255);
-
return "Intel Corporation E7520 Memory Controller Hub";
}
@@ -114,11 +162,9 @@ static const char __init *pci_mmcfg_intel_945(void)
if ((pciexbar & mask) >= 0xf0000000U)
return NULL;
- if (extend_mmcfg(1) == -1)
+ if (pci_mmconfig_add(0, 0, (len >> 20) - 1, pciexbar & mask) == NULL)
return NULL;
- fill_one_mmcfg(pciexbar & mask, 0, 0, (len >> 20) - 1);
-
return "Intel Corporation 945G/GZ/P/PL Express Memory Controller Hub";
}
@@ -127,7 +173,7 @@ static const char __init *pci_mmcfg_amd_fam10h(void)
u32 low, high, address;
u64 base, msr;
int i;
- unsigned segnbits = 0, busnbits;
+ unsigned segnbits = 0, busnbits, end_bus;
if (!(pci_probe & PCI_CHECK_ENABLE_AMD_MMCONF))
return NULL;
@@ -161,11 +207,13 @@ static const char __init *pci_mmcfg_amd_fam10h(void)
busnbits = 8;
}
- if (extend_mmcfg(1 << segnbits) == -1)
- return NULL;
-
+ end_bus = (1 << busnbits) - 1;
for (i = 0; i < (1 << segnbits); i++)
- fill_one_mmcfg(base + (1<<28) * i, i, 0, (1 << busnbits) - 1);
+ if (pci_mmconfig_add(i, 0, end_bus,
+ base + (1<<28) * i) == NULL) {
+ free_all_mmcfg();
+ return NULL;
+ }
return "AMD Family 10h NB";
}
@@ -190,7 +238,7 @@ static const char __init *pci_mmcfg_nvidia_mcp55(void)
/*
* do check if amd fam10h already took over
*/
- if (!acpi_disabled || pci_mmcfg_config_num || mcp55_checked)
+ if (!acpi_disabled || !list_empty(&pci_mmcfg_list) || mcp55_checked)
return NULL;
mcp55_checked = true;
@@ -213,16 +261,14 @@ static const char __init *pci_mmcfg_nvidia_mcp55(void)
if (!(extcfg & extcfg_enable_mask))
continue;
- if (extend_mmcfg(1) == -1)
- continue;
-
size_index = (extcfg & extcfg_size_mask) >> extcfg_size_shift;
base = extcfg & extcfg_base_mask[size_index];
/* base could > 4G */
base <<= extcfg_base_lshift;
start = (extcfg & extcfg_start_mask) >> extcfg_start_shift;
end = start + extcfg_sizebus[size_index] - 1;
- fill_one_mmcfg(base, 0, start, end);
+ if (pci_mmconfig_add(0, start, end, base) == NULL)
+ continue;
mcp55_mmconf_found++;
}
@@ -253,45 +299,27 @@ static struct pci_mmcfg_hostbridge_probe pci_mmcfg_probes[] __initdata = {
0x0369, pci_mmcfg_nvidia_mcp55 },
};
-static int __init cmp_mmcfg(const void *x1, const void *x2)
-{
- const typeof(pci_mmcfg_config[0]) *m1 = x1;
- const typeof(pci_mmcfg_config[0]) *m2 = x2;
- int start1, start2;
-
- start1 = m1->start_bus_number;
- start2 = m2->start_bus_number;
-
- return start1 - start2;
-}
-
static void __init pci_mmcfg_check_end_bus_number(void)
{
- int i;
- typeof(pci_mmcfg_config[0]) *cfg, *cfgx;
-
- /* sort them at first */
- sort(pci_mmcfg_config, pci_mmcfg_config_num,
- sizeof(pci_mmcfg_config[0]), cmp_mmcfg, NULL);
+ struct pci_mmcfg_region *cfg, *cfgx;
/* last one*/
- if (pci_mmcfg_config_num > 0) {
- i = pci_mmcfg_config_num - 1;
- cfg = &pci_mmcfg_config[i];
- if (cfg->end_bus_number < cfg->start_bus_number)
- cfg->end_bus_number = 255;
- }
+ cfg = list_entry(pci_mmcfg_list.prev, typeof(*cfg), list);
+ if (cfg)
+ if (cfg->end_bus < cfg->start_bus)
+ cfg->end_bus = 255;
- /* don't overlap please */
- for (i = 0; i < pci_mmcfg_config_num - 1; i++) {
- cfg = &pci_mmcfg_config[i];
- cfgx = &pci_mmcfg_config[i+1];
+ if (list_is_singular(&pci_mmcfg_list))
+ return;
- if (cfg->end_bus_number < cfg->start_bus_number)
- cfg->end_bus_number = 255;
+ /* don't overlap please */
+ list_for_each_entry(cfg, &pci_mmcfg_list, list) {
+ if (cfg->end_bus < cfg->start_bus)
+ cfg->end_bus = 255;
- if (cfg->end_bus_number >= cfgx->start_bus_number)
- cfg->end_bus_number = cfgx->start_bus_number - 1;
+ cfgx = list_entry(cfg->list.next, typeof(*cfg), list);
+ if (cfg != cfgx && cfg->end_bus >= cfgx->start_bus)
+ cfg->end_bus = cfgx->start_bus - 1;
}
}
@@ -306,8 +334,7 @@ static int __init pci_mmcfg_check_hostbridge(void)
if (!raw_pci_ops)
return 0;
- pci_mmcfg_config_num = 0;
- pci_mmcfg_config = NULL;
+ free_all_mmcfg();
for (i = 0; i < ARRAY_SIZE(pci_mmcfg_probes); i++) {
bus = pci_mmcfg_probes[i].bus;
@@ -322,45 +349,22 @@ static int __init pci_mmcfg_check_hostbridge(void)
name = pci_mmcfg_probes[i].probe();
if (name)
- printk(KERN_INFO "PCI: Found %s with MMCONFIG support.\n",
+ printk(KERN_INFO PREFIX "%s with MMCONFIG support\n",
name);
}
/* some end_bus_number is crazy, fix it */
pci_mmcfg_check_end_bus_number();
- return pci_mmcfg_config_num != 0;
+ return !list_empty(&pci_mmcfg_list);
}
static void __init pci_mmcfg_insert_resources(void)
{
-#define PCI_MMCFG_RESOURCE_NAME_LEN 24
- int i;
- struct resource *res;
- char *names;
- unsigned num_buses;
-
- res = kcalloc(PCI_MMCFG_RESOURCE_NAME_LEN + sizeof(*res),
- pci_mmcfg_config_num, GFP_KERNEL);
- if (!res) {
- printk(KERN_ERR "PCI: Unable to allocate MMCONFIG resources\n");
- return;
- }
+ struct pci_mmcfg_region *cfg;
- names = (void *)&res[pci_mmcfg_config_num];
- for (i = 0; i < pci_mmcfg_config_num; i++, res++) {
- struct acpi_mcfg_allocation *cfg = &pci_mmcfg_config[i];
- num_buses = cfg->end_bus_number - cfg->start_bus_number + 1;
- res->name = names;
- snprintf(names, PCI_MMCFG_RESOURCE_NAME_LEN,
- "PCI MMCONFIG %u [%02x-%02x]", cfg->pci_segment,
- cfg->start_bus_number, cfg->end_bus_number);
- res->start = cfg->address + (cfg->start_bus_number << 20);
- res->end = res->start + (num_buses << 20) - 1;
- res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
- insert_resource(&iomem_resource, res);
- names += PCI_MMCFG_RESOURCE_NAME_LEN;
- }
+ list_for_each_entry(cfg, &pci_mmcfg_list, list)
+ insert_resource(&iomem_resource, &cfg->res);
/* Mark that the resources have been inserted. */
pci_mmcfg_resources_inserted = 1;
@@ -437,11 +441,12 @@ static int __init is_acpi_reserved(u64 start, u64 end, unsigned not_used)
typedef int (*check_reserved_t)(u64 start, u64 end, unsigned type);
static int __init is_mmconf_reserved(check_reserved_t is_reserved,
- u64 addr, u64 size, int i,
- typeof(pci_mmcfg_config[0]) *cfg, int with_e820)
+ struct pci_mmcfg_region *cfg, int with_e820)
{
+ u64 addr = cfg->res.start;
+ u64 size = resource_size(&cfg->res);
u64 old_size = size;
- int valid = 0;
+ int valid = 0, num_buses;
while (!is_reserved(addr, addr + size, E820_RESERVED)) {
size >>= 1;
@@ -450,19 +455,25 @@ static int __init is_mmconf_reserved(check_reserved_t is_reserved,
}
if (size >= (16UL<<20) || size == old_size) {
- printk(KERN_NOTICE
- "PCI: MCFG area at %Lx reserved in %s\n",
- addr, with_e820?"E820":"ACPI motherboard resources");
+ printk(KERN_INFO PREFIX "MMCONFIG at %pR reserved in %s\n",
+ &cfg->res,
+ with_e820 ? "E820" : "ACPI motherboard resources");
valid = 1;
if (old_size != size) {
- /* update end_bus_number */
- cfg->end_bus_number = cfg->start_bus_number + ((size>>20) - 1);
- printk(KERN_NOTICE "PCI: updated MCFG configuration %d: base %lx "
- "segment %hu buses %u - %u\n",
- i, (unsigned long)cfg->address, cfg->pci_segment,
- (unsigned int)cfg->start_bus_number,
- (unsigned int)cfg->end_bus_number);
+ /* update end_bus */
+ cfg->end_bus = cfg->start_bus + ((size>>20) - 1);
+ num_buses = cfg->end_bus - cfg->start_bus + 1;
+ cfg->res.end = cfg->res.start +
+ PCI_MMCFG_BUS_OFFSET(num_buses) - 1;
+ snprintf(cfg->name, PCI_MMCFG_RESOURCE_NAME_LEN,
+ "PCI MMCONFIG %04x [bus %02x-%02x]",
+ cfg->segment, cfg->start_bus, cfg->end_bus);
+ printk(KERN_INFO PREFIX
+ "MMCONFIG for %04x [bus%02x-%02x] "
+ "at %pR (base %#lx) (size reduced!)\n",
+ cfg->segment, cfg->start_bus, cfg->end_bus,
+ &cfg->res, (unsigned long) cfg->address);
}
}
@@ -471,45 +482,26 @@ static int __init is_mmconf_reserved(check_reserved_t is_reserved,
static void __init pci_mmcfg_reject_broken(int early)
{
- typeof(pci_mmcfg_config[0]) *cfg;
- int i;
+ struct pci_mmcfg_region *cfg;
- if ((pci_mmcfg_config_num == 0) ||
- (pci_mmcfg_config == NULL) ||
- (pci_mmcfg_config[0].address == 0))
- return;
-
- for (i = 0; i < pci_mmcfg_config_num; i++) {
+ list_for_each_entry(cfg, &pci_mmcfg_list, list) {
int valid = 0;
- u64 addr, size;
-
- cfg = &pci_mmcfg_config[i];
- addr = cfg->start_bus_number;
- addr <<= 20;
- addr += cfg->address;
- size = cfg->end_bus_number + 1 - cfg->start_bus_number;
- size <<= 20;
- printk(KERN_NOTICE "PCI: MCFG configuration %d: base %lx "
- "segment %hu buses %u - %u\n",
- i, (unsigned long)cfg->address, cfg->pci_segment,
- (unsigned int)cfg->start_bus_number,
- (unsigned int)cfg->end_bus_number);
if (!early && !acpi_disabled)
- valid = is_mmconf_reserved(is_acpi_reserved, addr, size, i, cfg, 0);
+ valid = is_mmconf_reserved(is_acpi_reserved, cfg, 0);
if (valid)
continue;
if (!early)
- printk(KERN_ERR "PCI: BIOS Bug: MCFG area at %Lx is not"
- " reserved in ACPI motherboard resources\n",
- cfg->address);
+ printk(KERN_ERR FW_BUG PREFIX
+ "MMCONFIG at %pR not reserved in "
+ "ACPI motherboard resources\n", &cfg->res);
/* Don't try to do this check unless configuration
type 1 is available. how about type 2 ?*/
if (raw_pci_ops)
- valid = is_mmconf_reserved(e820_all_mapped, addr, size, i, cfg, 1);
+ valid = is_mmconf_reserved(e820_all_mapped, cfg, 1);
if (!valid)
goto reject;
@@ -518,34 +510,41 @@ static void __init pci_mmcfg_reject_broken(int early)
return;
reject:
- printk(KERN_INFO "PCI: Not using MMCONFIG.\n");
- pci_mmcfg_arch_free();
- kfree(pci_mmcfg_config);
- pci_mmcfg_config = NULL;
- pci_mmcfg_config_num = 0;
+ printk(KERN_INFO PREFIX "not using MMCONFIG\n");
+ free_all_mmcfg();
}
static int __initdata known_bridge;
-static int acpi_mcfg_64bit_base_addr __initdata = FALSE;
+static int __init acpi_mcfg_check_entry(struct acpi_table_mcfg *mcfg,
+ struct acpi_mcfg_allocation *cfg)
+{
+ int year;
-/* The physical address of the MMCONFIG aperture. Set from ACPI tables. */
-struct acpi_mcfg_allocation *pci_mmcfg_config;
-int pci_mmcfg_config_num;
+ if (cfg->address < 0xFFFFFFFF)
+ return 0;
-static int __init acpi_mcfg_oem_check(struct acpi_table_mcfg *mcfg)
-{
if (!strcmp(mcfg->header.oem_id, "SGI"))
- acpi_mcfg_64bit_base_addr = TRUE;
+ return 0;
- return 0;
+ if (mcfg->header.revision >= 1) {
+ if (dmi_get_date(DMI_BIOS_DATE, &year, NULL, NULL) &&
+ year >= 2010)
+ return 0;
+ }
+
+ printk(KERN_ERR PREFIX "MCFG region for %04x [bus %02x-%02x] at %#llx "
+ "is above 4GB, ignored\n", cfg->pci_segment,
+ cfg->start_bus_number, cfg->end_bus_number, cfg->address);
+ return -EINVAL;
}
static int __init pci_parse_mcfg(struct acpi_table_header *header)
{
struct acpi_table_mcfg *mcfg;
+ struct acpi_mcfg_allocation *cfg_table, *cfg;
unsigned long i;
- int config_size;
+ int entries;
if (!header)
return -EINVAL;
@@ -553,38 +552,33 @@ static int __init pci_parse_mcfg(struct acpi_table_header *header)
mcfg = (struct acpi_table_mcfg *)header;
/* how many config structures do we have */
- pci_mmcfg_config_num = 0;
+ free_all_mmcfg();
+ entries = 0;
i = header->length - sizeof(struct acpi_table_mcfg);
while (i >= sizeof(struct acpi_mcfg_allocation)) {
- ++pci_mmcfg_config_num;
+ entries++;
i -= sizeof(struct acpi_mcfg_allocation);
};
- if (pci_mmcfg_config_num == 0) {
+ if (entries == 0) {
printk(KERN_ERR PREFIX "MMCONFIG has no entries\n");
return -ENODEV;
}
- config_size = pci_mmcfg_config_num * sizeof(*pci_mmcfg_config);
- pci_mmcfg_config = kmalloc(config_size, GFP_KERNEL);
- if (!pci_mmcfg_config) {
- printk(KERN_WARNING PREFIX
- "No memory for MCFG config tables\n");
- return -ENOMEM;
- }
-
- memcpy(pci_mmcfg_config, &mcfg[1], config_size);
-
- acpi_mcfg_oem_check(mcfg);
-
- for (i = 0; i < pci_mmcfg_config_num; ++i) {
- if ((pci_mmcfg_config[i].address > 0xFFFFFFFF) &&
- !acpi_mcfg_64bit_base_addr) {
- printk(KERN_ERR PREFIX
- "MMCONFIG not in low 4GB of memory\n");
- kfree(pci_mmcfg_config);
- pci_mmcfg_config_num = 0;
+ cfg_table = (struct acpi_mcfg_allocation *) &mcfg[1];
+ for (i = 0; i < entries; i++) {
+ cfg = &cfg_table[i];
+ if (acpi_mcfg_check_entry(mcfg, cfg)) {
+ free_all_mmcfg();
return -ENODEV;
}
+
+ if (pci_mmconfig_add(cfg->pci_segment, cfg->start_bus_number,
+ cfg->end_bus_number, cfg->address) == NULL) {
+ printk(KERN_WARNING PREFIX
+ "no memory for MCFG entries\n");
+ free_all_mmcfg();
+ return -ENOMEM;
+ }
}
return 0;
@@ -614,9 +608,7 @@ static void __init __pci_mmcfg_init(int early)
pci_mmcfg_reject_broken(early);
- if ((pci_mmcfg_config_num == 0) ||
- (pci_mmcfg_config == NULL) ||
- (pci_mmcfg_config[0].address == 0))
+ if (list_empty(&pci_mmcfg_list))
return;
if (pci_mmcfg_arch_init())
@@ -648,9 +640,7 @@ static int __init pci_mmcfg_late_insert_resources(void)
*/
if ((pci_mmcfg_resources_inserted == 1) ||
(pci_probe & PCI_PROBE_MMCONF) == 0 ||
- (pci_mmcfg_config_num == 0) ||
- (pci_mmcfg_config == NULL) ||
- (pci_mmcfg_config[0].address == 0))
+ list_empty(&pci_mmcfg_list))
return 1;
/*
diff --git a/arch/x86/pci/mmconfig_32.c b/arch/x86/pci/mmconfig_32.c
index f10a7e94a84c..90d5fd476ed4 100644
--- a/arch/x86/pci/mmconfig_32.c
+++ b/arch/x86/pci/mmconfig_32.c
@@ -27,18 +27,10 @@ static int mmcfg_last_accessed_cpu;
*/
static u32 get_base_addr(unsigned int seg, int bus, unsigned devfn)
{
- struct acpi_mcfg_allocation *cfg;
- int cfg_num;
-
- for (cfg_num = 0; cfg_num < pci_mmcfg_config_num; cfg_num++) {
- cfg = &pci_mmcfg_config[cfg_num];
- if (cfg->pci_segment == seg &&
- (cfg->start_bus_number <= bus) &&
- (cfg->end_bus_number >= bus))
- return cfg->address;
- }
+ struct pci_mmcfg_region *cfg = pci_mmconfig_lookup(seg, bus);
- /* Fall back to type 0 */
+ if (cfg)
+ return cfg->address;
return 0;
}
@@ -47,7 +39,7 @@ static u32 get_base_addr(unsigned int seg, int bus, unsigned devfn)
*/
static void pci_exp_set_dev_base(unsigned int base, int bus, int devfn)
{
- u32 dev_base = base | (bus << 20) | (devfn << 12);
+ u32 dev_base = base | PCI_MMCFG_BUS_OFFSET(bus) | (devfn << 12);
int cpu = smp_processor_id();
if (dev_base != mmcfg_last_accessed_device ||
cpu != mmcfg_last_accessed_cpu) {
diff --git a/arch/x86/pci/mmconfig_64.c b/arch/x86/pci/mmconfig_64.c
index 94349f8b2f96..e783841bd1d7 100644
--- a/arch/x86/pci/mmconfig_64.c
+++ b/arch/x86/pci/mmconfig_64.c
@@ -12,38 +12,15 @@
#include <asm/e820.h>
#include <asm/pci_x86.h>
-/* Static virtual mapping of the MMCONFIG aperture */
-struct mmcfg_virt {
- struct acpi_mcfg_allocation *cfg;
- char __iomem *virt;
-};
-static struct mmcfg_virt *pci_mmcfg_virt;
-
-static char __iomem *get_virt(unsigned int seg, unsigned bus)
-{
- struct acpi_mcfg_allocation *cfg;
- int cfg_num;
-
- for (cfg_num = 0; cfg_num < pci_mmcfg_config_num; cfg_num++) {
- cfg = pci_mmcfg_virt[cfg_num].cfg;
- if (cfg->pci_segment == seg &&
- (cfg->start_bus_number <= bus) &&
- (cfg->end_bus_number >= bus))
- return pci_mmcfg_virt[cfg_num].virt;
- }
-
- /* Fall back to type 0 */
- return NULL;
-}
+#define PREFIX "PCI: "
static char __iomem *pci_dev_base(unsigned int seg, unsigned int bus, unsigned int devfn)
{
- char __iomem *addr;
+ struct pci_mmcfg_region *cfg = pci_mmconfig_lookup(seg, bus);
- addr = get_virt(seg, bus);
- if (!addr)
- return NULL;
- return addr + ((bus << 20) | (devfn << 12));
+ if (cfg && cfg->virt)
+ return cfg->virt + (PCI_MMCFG_BUS_OFFSET(bus) | (devfn << 12));
+ return NULL;
}
static int pci_mmcfg_read(unsigned int seg, unsigned int bus,
@@ -109,42 +86,30 @@ static struct pci_raw_ops pci_mmcfg = {
.write = pci_mmcfg_write,
};
-static void __iomem * __init mcfg_ioremap(struct acpi_mcfg_allocation *cfg)
+static void __iomem * __init mcfg_ioremap(struct pci_mmcfg_region *cfg)
{
void __iomem *addr;
u64 start, size;
+ int num_buses;
- start = cfg->start_bus_number;
- start <<= 20;
- start += cfg->address;
- size = cfg->end_bus_number + 1 - cfg->start_bus_number;
- size <<= 20;
+ start = cfg->address + PCI_MMCFG_BUS_OFFSET(cfg->start_bus);
+ num_buses = cfg->end_bus - cfg->start_bus + 1;
+ size = PCI_MMCFG_BUS_OFFSET(num_buses);
addr = ioremap_nocache(start, size);
- if (addr) {
- printk(KERN_INFO "PCI: Using MMCONFIG at %Lx - %Lx\n",
- start, start + size - 1);
- addr -= cfg->start_bus_number << 20;
- }
+ if (addr)
+ addr -= PCI_MMCFG_BUS_OFFSET(cfg->start_bus);
return addr;
}
int __init pci_mmcfg_arch_init(void)
{
- int i;
- pci_mmcfg_virt = kzalloc(sizeof(*pci_mmcfg_virt) *
- pci_mmcfg_config_num, GFP_KERNEL);
- if (pci_mmcfg_virt == NULL) {
- printk(KERN_ERR "PCI: Can not allocate memory for mmconfig structures\n");
- return 0;
- }
+ struct pci_mmcfg_region *cfg;
- for (i = 0; i < pci_mmcfg_config_num; ++i) {
- pci_mmcfg_virt[i].cfg = &pci_mmcfg_config[i];
- pci_mmcfg_virt[i].virt = mcfg_ioremap(&pci_mmcfg_config[i]);
- if (!pci_mmcfg_virt[i].virt) {
- printk(KERN_ERR "PCI: Cannot map mmconfig aperture for "
- "segment %d\n",
- pci_mmcfg_config[i].pci_segment);
+ list_for_each_entry(cfg, &pci_mmcfg_list, list) {
+ cfg->virt = mcfg_ioremap(cfg);
+ if (!cfg->virt) {
+ printk(KERN_ERR PREFIX "can't map MMCONFIG at %pR\n",
+ &cfg->res);
pci_mmcfg_arch_free();
return 0;
}
@@ -155,19 +120,12 @@ int __init pci_mmcfg_arch_init(void)
void __init pci_mmcfg_arch_free(void)
{
- int i;
-
- if (pci_mmcfg_virt == NULL)
- return;
+ struct pci_mmcfg_region *cfg;
- for (i = 0; i < pci_mmcfg_config_num; ++i) {
- if (pci_mmcfg_virt[i].virt) {
- iounmap(pci_mmcfg_virt[i].virt + (pci_mmcfg_virt[i].cfg->start_bus_number << 20));
- pci_mmcfg_virt[i].virt = NULL;
- pci_mmcfg_virt[i].cfg = NULL;
+ list_for_each_entry(cfg, &pci_mmcfg_list, list) {
+ if (cfg->virt) {
+ iounmap(cfg->virt + PCI_MMCFG_BUS_OFFSET(cfg->start_bus));
+ cfg->virt = NULL;
}
}
-
- kfree(pci_mmcfg_virt);
- pci_mmcfg_virt = NULL;
}
diff --git a/arch/x86/tools/chkobjdump.awk b/arch/x86/tools/chkobjdump.awk
index 0d13cd9fdcff..5bbb5a33f220 100644
--- a/arch/x86/tools/chkobjdump.awk
+++ b/arch/x86/tools/chkobjdump.awk
@@ -9,7 +9,7 @@ BEGIN {
}
/^GNU/ {
- split($4, ver, ".");
+ split($3, ver, ".");
if (ver[1] > od_ver ||
(ver[1] == od_ver && ver[2] >= od_sver)) {
exit 1;
diff --git a/arch/x86/tools/gen-insn-attr-x86.awk b/arch/x86/tools/gen-insn-attr-x86.awk
index e34e92a28eb6..eaf11f52fc0b 100644
--- a/arch/x86/tools/gen-insn-attr-x86.awk
+++ b/arch/x86/tools/gen-insn-attr-x86.awk
@@ -6,8 +6,6 @@
# Awk implementation sanity check
function check_awk_implement() {
- if (!match("abc", "[[:lower:]]+"))
- return "Your awk doesn't support charactor-class."
if (sprintf("%x", 0) != "0")
return "Your awk has a printf-format problem."
return ""
@@ -44,12 +42,12 @@ BEGIN {
delete gtable
delete atable
- opnd_expr = "^[[:alpha:]/]"
+ opnd_expr = "^[A-Za-z/]"
ext_expr = "^\\("
sep_expr = "^\\|$"
- group_expr = "^Grp[[:alnum:]]+"
+ group_expr = "^Grp[0-9A-Za-z]+"
- imm_expr = "^[IJAO][[:lower:]]"
+ imm_expr = "^[IJAO][a-z]"
imm_flag["Ib"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
imm_flag["Jb"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
imm_flag["Iw"] = "INAT_MAKE_IMM(INAT_IMM_WORD)"
@@ -62,7 +60,7 @@ BEGIN {
imm_flag["Ob"] = "INAT_MOFFSET"
imm_flag["Ov"] = "INAT_MOFFSET"
- modrm_expr = "^([CDEGMNPQRSUVW/][[:lower:]]+|NTA|T[012])"
+ modrm_expr = "^([CDEGMNPQRSUVW/][a-z]+|NTA|T[012])"
force64_expr = "\\([df]64\\)"
rex_expr = "^REX(\\.[XRWB]+)*"
fpu_expr = "^ESC" # TODO
@@ -226,12 +224,12 @@ function add_flags(old,new) {
}
# convert operands to flags.
-function convert_operands(opnd, i,imm,mod)
+function convert_operands(count,opnd, i,j,imm,mod)
{
imm = null
mod = null
- for (i in opnd) {
- i = opnd[i]
+ for (j = 1; j <= count; j++) {
+ i = opnd[j]
if (match(i, imm_expr) == 1) {
if (!imm_flag[i])
semantic_error("Unknown imm opnd: " i)
@@ -282,8 +280,8 @@ function convert_operands(opnd, i,imm,mod)
# parse one opcode
if (match($i, opnd_expr)) {
opnd = $i
- split($(i++), opnds, ",")
- flags = convert_operands(opnds)
+ count = split($(i++), opnds, ",")
+ flags = convert_operands(count, opnds)
}
if (match($i, ext_expr))
ext = $(i++)
diff --git a/arch/x86/tools/test_get_len.c b/arch/x86/tools/test_get_len.c
index d8214dc03fa7..bee8d6ac2691 100644
--- a/arch/x86/tools/test_get_len.c
+++ b/arch/x86/tools/test_get_len.c
@@ -113,7 +113,7 @@ int main(int argc, char **argv)
char line[BUFSIZE], sym[BUFSIZE] = "<unknown>";
unsigned char insn_buf[16];
struct insn insn;
- int insns = 0, c;
+ int insns = 0;
int warnings = 0;
parse_args(argc, argv);
diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c
index 58bc00f68b12..02b442e92007 100644
--- a/arch/x86/vdso/vdso32-setup.c
+++ b/arch/x86/vdso/vdso32-setup.c
@@ -393,7 +393,6 @@ static ctl_table abi_table2[] = {
static ctl_table abi_root_table2[] = {
{
- .ctl_name = CTL_ABI,
.procname = "abi",
.mode = 0555,
.child = abi_table2
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index dfbf70e65860..2b26dd5930c6 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -27,7 +27,9 @@
#include <linux/page-flags.h>
#include <linux/highmem.h>
#include <linux/console.h>
+#include <linux/pci.h>
+#include <xen/xen.h>
#include <xen/interface/xen.h>
#include <xen/interface/version.h>
#include <xen/interface/physdev.h>
@@ -138,24 +140,23 @@ static void xen_vcpu_setup(int cpu)
*/
void xen_vcpu_restore(void)
{
- if (have_vcpu_info_placement) {
- int cpu;
+ int cpu;
- for_each_online_cpu(cpu) {
- bool other_cpu = (cpu != smp_processor_id());
+ for_each_online_cpu(cpu) {
+ bool other_cpu = (cpu != smp_processor_id());
- if (other_cpu &&
- HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL))
- BUG();
+ if (other_cpu &&
+ HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL))
+ BUG();
- xen_vcpu_setup(cpu);
+ xen_setup_runstate_info(cpu);
- if (other_cpu &&
- HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL))
- BUG();
- }
+ if (have_vcpu_info_placement)
+ xen_vcpu_setup(cpu);
- BUG_ON(!have_vcpu_info_placement);
+ if (other_cpu &&
+ HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL))
+ BUG();
}
}
@@ -1093,10 +1094,8 @@ asmlinkage void __init xen_start_kernel(void)
__supported_pte_mask |= _PAGE_IOMAP;
-#ifdef CONFIG_X86_64
/* Work out if we support NX */
- check_efer();
-#endif
+ x86_configure_nx();
xen_setup_features();
@@ -1178,10 +1177,16 @@ asmlinkage void __init xen_start_kernel(void)
add_preferred_console("xenboot", 0, NULL);
add_preferred_console("tty", 0, NULL);
add_preferred_console("hvc", 0, NULL);
+ } else {
+ /* Make sure ACS will be enabled */
+ pci_request_acs();
}
+
xen_raw_console_write("about to get started...\n");
+ xen_setup_runstate_info(0);
+
/* Start the world */
#ifdef CONFIG_X86_32
i386_start_kernel();
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 3bf7b1d250ce..bf4cd6bfe959 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -185,7 +185,7 @@ static inline unsigned p2m_index(unsigned long pfn)
}
/* Build the parallel p2m_top_mfn structures */
-static void __init xen_build_mfn_list_list(void)
+void xen_build_mfn_list_list(void)
{
unsigned pfn, idx;
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index fe03eeed7b48..563d20504988 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -35,10 +35,10 @@
cpumask_var_t xen_cpu_initialized_map;
-static DEFINE_PER_CPU(int, resched_irq);
-static DEFINE_PER_CPU(int, callfunc_irq);
-static DEFINE_PER_CPU(int, callfuncsingle_irq);
-static DEFINE_PER_CPU(int, debug_irq) = -1;
+static DEFINE_PER_CPU(int, xen_resched_irq);
+static DEFINE_PER_CPU(int, xen_callfunc_irq);
+static DEFINE_PER_CPU(int, xen_callfuncsingle_irq);
+static DEFINE_PER_CPU(int, xen_debug_irq) = -1;
static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id);
static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id);
@@ -73,7 +73,7 @@ static __cpuinit void cpu_bringup(void)
xen_setup_cpu_clockevents();
- cpu_set(cpu, cpu_online_map);
+ set_cpu_online(cpu, true);
percpu_write(cpu_state, CPU_ONLINE);
wmb();
@@ -103,7 +103,7 @@ static int xen_smp_intr_init(unsigned int cpu)
NULL);
if (rc < 0)
goto fail;
- per_cpu(resched_irq, cpu) = rc;
+ per_cpu(xen_resched_irq, cpu) = rc;
callfunc_name = kasprintf(GFP_KERNEL, "callfunc%d", cpu);
rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_VECTOR,
@@ -114,7 +114,7 @@ static int xen_smp_intr_init(unsigned int cpu)
NULL);
if (rc < 0)
goto fail;
- per_cpu(callfunc_irq, cpu) = rc;
+ per_cpu(xen_callfunc_irq, cpu) = rc;
debug_name = kasprintf(GFP_KERNEL, "debug%d", cpu);
rc = bind_virq_to_irqhandler(VIRQ_DEBUG, cpu, xen_debug_interrupt,
@@ -122,7 +122,7 @@ static int xen_smp_intr_init(unsigned int cpu)
debug_name, NULL);
if (rc < 0)
goto fail;
- per_cpu(debug_irq, cpu) = rc;
+ per_cpu(xen_debug_irq, cpu) = rc;
callfunc_name = kasprintf(GFP_KERNEL, "callfuncsingle%d", cpu);
rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_SINGLE_VECTOR,
@@ -133,19 +133,20 @@ static int xen_smp_intr_init(unsigned int cpu)
NULL);
if (rc < 0)
goto fail;
- per_cpu(callfuncsingle_irq, cpu) = rc;
+ per_cpu(xen_callfuncsingle_irq, cpu) = rc;
return 0;
fail:
- if (per_cpu(resched_irq, cpu) >= 0)
- unbind_from_irqhandler(per_cpu(resched_irq, cpu), NULL);
- if (per_cpu(callfunc_irq, cpu) >= 0)
- unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL);
- if (per_cpu(debug_irq, cpu) >= 0)
- unbind_from_irqhandler(per_cpu(debug_irq, cpu), NULL);
- if (per_cpu(callfuncsingle_irq, cpu) >= 0)
- unbind_from_irqhandler(per_cpu(callfuncsingle_irq, cpu), NULL);
+ if (per_cpu(xen_resched_irq, cpu) >= 0)
+ unbind_from_irqhandler(per_cpu(xen_resched_irq, cpu), NULL);
+ if (per_cpu(xen_callfunc_irq, cpu) >= 0)
+ unbind_from_irqhandler(per_cpu(xen_callfunc_irq, cpu), NULL);
+ if (per_cpu(xen_debug_irq, cpu) >= 0)
+ unbind_from_irqhandler(per_cpu(xen_debug_irq, cpu), NULL);
+ if (per_cpu(xen_callfuncsingle_irq, cpu) >= 0)
+ unbind_from_irqhandler(per_cpu(xen_callfuncsingle_irq, cpu),
+ NULL);
return rc;
}
@@ -295,6 +296,7 @@ static int __cpuinit xen_cpu_up(unsigned int cpu)
(unsigned long)task_stack_page(idle) -
KERNEL_STACK_OFFSET + THREAD_SIZE;
#endif
+ xen_setup_runstate_info(cpu);
xen_setup_timer(cpu);
xen_init_lock_cpu(cpu);
@@ -348,10 +350,10 @@ static void xen_cpu_die(unsigned int cpu)
current->state = TASK_UNINTERRUPTIBLE;
schedule_timeout(HZ/10);
}
- unbind_from_irqhandler(per_cpu(resched_irq, cpu), NULL);
- unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL);
- unbind_from_irqhandler(per_cpu(debug_irq, cpu), NULL);
- unbind_from_irqhandler(per_cpu(callfuncsingle_irq, cpu), NULL);
+ unbind_from_irqhandler(per_cpu(xen_resched_irq, cpu), NULL);
+ unbind_from_irqhandler(per_cpu(xen_callfunc_irq, cpu), NULL);
+ unbind_from_irqhandler(per_cpu(xen_debug_irq, cpu), NULL);
+ unbind_from_irqhandler(per_cpu(xen_callfuncsingle_irq, cpu), NULL);
xen_uninit_lock_cpu(cpu);
xen_teardown_timer(cpu);
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index 36a5141108df..24ded31b5aec 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -120,14 +120,14 @@ struct xen_spinlock {
unsigned short spinners; /* count of waiting cpus */
};
-static int xen_spin_is_locked(struct raw_spinlock *lock)
+static int xen_spin_is_locked(struct arch_spinlock *lock)
{
struct xen_spinlock *xl = (struct xen_spinlock *)lock;
return xl->lock != 0;
}
-static int xen_spin_is_contended(struct raw_spinlock *lock)
+static int xen_spin_is_contended(struct arch_spinlock *lock)
{
struct xen_spinlock *xl = (struct xen_spinlock *)lock;
@@ -136,7 +136,7 @@ static int xen_spin_is_contended(struct raw_spinlock *lock)
return xl->spinners != 0;
}
-static int xen_spin_trylock(struct raw_spinlock *lock)
+static int xen_spin_trylock(struct arch_spinlock *lock)
{
struct xen_spinlock *xl = (struct xen_spinlock *)lock;
u8 old = 1;
@@ -181,7 +181,7 @@ static inline void unspinning_lock(struct xen_spinlock *xl, struct xen_spinlock
__get_cpu_var(lock_spinners) = prev;
}
-static noinline int xen_spin_lock_slow(struct raw_spinlock *lock, bool irq_enable)
+static noinline int xen_spin_lock_slow(struct arch_spinlock *lock, bool irq_enable)
{
struct xen_spinlock *xl = (struct xen_spinlock *)lock;
struct xen_spinlock *prev;
@@ -254,7 +254,7 @@ out:
return ret;
}
-static inline void __xen_spin_lock(struct raw_spinlock *lock, bool irq_enable)
+static inline void __xen_spin_lock(struct arch_spinlock *lock, bool irq_enable)
{
struct xen_spinlock *xl = (struct xen_spinlock *)lock;
unsigned timeout;
@@ -291,12 +291,12 @@ static inline void __xen_spin_lock(struct raw_spinlock *lock, bool irq_enable)
spin_time_accum_total(start_spin);
}
-static void xen_spin_lock(struct raw_spinlock *lock)
+static void xen_spin_lock(struct arch_spinlock *lock)
{
__xen_spin_lock(lock, false);
}
-static void xen_spin_lock_flags(struct raw_spinlock *lock, unsigned long flags)
+static void xen_spin_lock_flags(struct arch_spinlock *lock, unsigned long flags)
{
__xen_spin_lock(lock, !raw_irqs_disabled_flags(flags));
}
@@ -317,7 +317,7 @@ static noinline void xen_spin_unlock_slow(struct xen_spinlock *xl)
}
}
-static void xen_spin_unlock(struct raw_spinlock *lock)
+static void xen_spin_unlock(struct arch_spinlock *lock)
{
struct xen_spinlock *xl = (struct xen_spinlock *)lock;
diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c
index 95be7b434724..987267f79bf5 100644
--- a/arch/x86/xen/suspend.c
+++ b/arch/x86/xen/suspend.c
@@ -1,4 +1,5 @@
#include <linux/types.h>
+#include <linux/clockchips.h>
#include <xen/interface/xen.h>
#include <xen/grant_table.h>
@@ -27,6 +28,8 @@ void xen_pre_suspend(void)
void xen_post_suspend(int suspend_cancelled)
{
+ xen_build_mfn_list_list();
+
xen_setup_shared_info();
if (suspend_cancelled) {
@@ -44,7 +47,19 @@ void xen_post_suspend(int suspend_cancelled)
}
+static void xen_vcpu_notify_restore(void *data)
+{
+ unsigned long reason = (unsigned long)data;
+
+ /* Boot processor notified via generic timekeeping_resume() */
+ if ( smp_processor_id() == 0)
+ return;
+
+ clockevents_notify(reason, NULL);
+}
+
void xen_arch_resume(void)
{
- /* nothing */
+ smp_call_function(xen_vcpu_notify_restore,
+ (void *)CLOCK_EVT_NOTIFY_RESUME, 1);
}
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index 0a5aa44299a5..0d3f07cd1b5f 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -31,14 +31,14 @@
#define NS_PER_TICK (1000000000LL / HZ)
/* runstate info updated by Xen */
-static DEFINE_PER_CPU(struct vcpu_runstate_info, runstate);
+static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate);
/* snapshots of runstate info */
-static DEFINE_PER_CPU(struct vcpu_runstate_info, runstate_snapshot);
+static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate_snapshot);
/* unused ns of stolen and blocked time */
-static DEFINE_PER_CPU(u64, residual_stolen);
-static DEFINE_PER_CPU(u64, residual_blocked);
+static DEFINE_PER_CPU(u64, xen_residual_stolen);
+static DEFINE_PER_CPU(u64, xen_residual_blocked);
/* return an consistent snapshot of 64-bit time/counter value */
static u64 get64(const u64 *p)
@@ -79,7 +79,7 @@ static void get_runstate_snapshot(struct vcpu_runstate_info *res)
BUG_ON(preemptible());
- state = &__get_cpu_var(runstate);
+ state = &__get_cpu_var(xen_runstate);
/*
* The runstate info is always updated by the hypervisor on
@@ -97,14 +97,14 @@ static void get_runstate_snapshot(struct vcpu_runstate_info *res)
/* return true when a vcpu could run but has no real cpu to run on */
bool xen_vcpu_stolen(int vcpu)
{
- return per_cpu(runstate, vcpu).state == RUNSTATE_runnable;
+ return per_cpu(xen_runstate, vcpu).state == RUNSTATE_runnable;
}
-static void setup_runstate_info(int cpu)
+void xen_setup_runstate_info(int cpu)
{
struct vcpu_register_runstate_memory_area area;
- area.addr.v = &per_cpu(runstate, cpu);
+ area.addr.v = &per_cpu(xen_runstate, cpu);
if (HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area,
cpu, &area))
@@ -122,7 +122,7 @@ static void do_stolen_accounting(void)
WARN_ON(state.state != RUNSTATE_running);
- snap = &__get_cpu_var(runstate_snapshot);
+ snap = &__get_cpu_var(xen_runstate_snapshot);
/* work out how much time the VCPU has not been runn*ing* */
blocked = state.time[RUNSTATE_blocked] - snap->time[RUNSTATE_blocked];
@@ -133,24 +133,24 @@ static void do_stolen_accounting(void)
/* Add the appropriate number of ticks of stolen time,
including any left-overs from last time. */
- stolen = runnable + offline + __get_cpu_var(residual_stolen);
+ stolen = runnable + offline + __get_cpu_var(xen_residual_stolen);
if (stolen < 0)
stolen = 0;
ticks = iter_div_u64_rem(stolen, NS_PER_TICK, &stolen);
- __get_cpu_var(residual_stolen) = stolen;
+ __get_cpu_var(xen_residual_stolen) = stolen;
account_steal_ticks(ticks);
/* Add the appropriate number of ticks of blocked time,
including any left-overs from last time. */
- blocked += __get_cpu_var(residual_blocked);
+ blocked += __get_cpu_var(xen_residual_blocked);
if (blocked < 0)
blocked = 0;
ticks = iter_div_u64_rem(blocked, NS_PER_TICK, &blocked);
- __get_cpu_var(residual_blocked) = blocked;
+ __get_cpu_var(xen_residual_blocked) = blocked;
account_idle_ticks(ticks);
}
@@ -434,7 +434,7 @@ void xen_setup_timer(int cpu)
name = "<timer kasprintf failed>";
irq = bind_virq_to_irqhandler(VIRQ_TIMER, cpu, xen_timer_interrupt,
- IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING,
+ IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING|IRQF_TIMER,
name, NULL);
evt = &per_cpu(xen_clock_events, cpu);
@@ -442,8 +442,6 @@ void xen_setup_timer(int cpu)
evt->cpumask = cpumask_of(cpu);
evt->irq = irq;
-
- setup_runstate_info(cpu);
}
void xen_teardown_timer(int cpu)
@@ -494,6 +492,7 @@ __init void xen_time_init(void)
setup_force_cpu_cap(X86_FEATURE_TSC);
+ xen_setup_runstate_info(cpu);
xen_setup_timer(cpu);
xen_setup_cpu_clockevents();
}
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
index 02f496a8dbaa..53adefda4275 100644
--- a/arch/x86/xen/xen-asm_64.S
+++ b/arch/x86/xen/xen-asm_64.S
@@ -96,7 +96,7 @@ ENTRY(xen_sysret32)
pushq $__USER32_CS
pushq %rcx
- pushq $VGCF_in_syscall
+ pushq $0
1: jmp hypercall_iret
ENDPATCH(xen_sysret32)
RELOC(xen_sysret32, 1b+1)
@@ -151,7 +151,7 @@ ENTRY(xen_syscall32_target)
ENTRY(xen_sysenter_target)
lea 16(%rsp), %rsp /* strip %rcx, %r11 */
mov $-ENOSYS, %rax
- pushq $VGCF_in_syscall
+ pushq $0
jmp hypercall_iret
ENDPROC(xen_syscall32_target)
ENDPROC(xen_sysenter_target)
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 355fa6b99c9c..f9153a300bce 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -25,6 +25,7 @@ extern struct shared_info *HYPERVISOR_shared_info;
void xen_setup_mfn_list_list(void);
void xen_setup_shared_info(void);
+void xen_build_mfn_list_list(void);
void xen_setup_machphys_mapping(void);
pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn);
void xen_ident_map_ISA(void);
@@ -41,6 +42,7 @@ void __init xen_build_dynamic_phys_to_machine(void);
void xen_init_irq_ops(void);
void xen_setup_timer(int cpu);
+void xen_setup_runstate_info(int cpu);
void xen_teardown_timer(int cpu);
cycle_t xen_clocksource_read(void);
void xen_setup_cpu_clockevents(void);