summaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig8
-rw-r--r--arch/x86/Kconfig.debug2
-rw-r--r--arch/x86/boot/header.S2
-rw-r--r--arch/x86/configs/x86_64_defconfig4
-rw-r--r--arch/x86/include/asm/alternative-asm.h1
-rw-r--r--arch/x86/include/asm/alternative.h4
-rw-r--r--arch/x86/include/asm/cpufeature.h2
-rw-r--r--arch/x86/include/asm/device.h2
-rw-r--r--arch/x86/include/asm/dma-mapping.h2
-rw-r--r--arch/x86/include/asm/hw_irq.h2
-rw-r--r--arch/x86/include/asm/hyperv.h1
-rw-r--r--arch/x86/include/asm/irq_remapping.h6
-rw-r--r--arch/x86/include/asm/xen/page.h6
-rw-r--r--arch/x86/kernel/amd_gart_64.c2
-rw-r--r--arch/x86/kernel/apic/apic.c33
-rw-r--r--arch/x86/kernel/apic/io_apic.c284
-rw-r--r--arch/x86/kernel/apm_32.c2
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel.c10
-rw-r--r--arch/x86/kernel/kprobes.c4
-rw-r--r--arch/x86/kernel/pci-dma.c4
-rw-r--r--arch/x86/kernel/process_32.c2
-rw-r--r--arch/x86/kernel/rtc.c23
-rw-r--r--arch/x86/kernel/vsyscall_64.c2
-rw-r--r--arch/x86/kvm/emulate.c2
-rw-r--r--arch/x86/kvm/mmu.c3
-rw-r--r--arch/x86/mm/init.c3
-rw-r--r--arch/x86/mm/mmio-mod.c1
-rw-r--r--arch/x86/oprofile/nmi_int.c4
-rw-r--r--arch/x86/pci/acpi.c11
-rw-r--r--arch/x86/pci/xen.c32
-rw-r--r--arch/x86/platform/mrst/mrst.c25
-rw-r--r--arch/x86/platform/mrst/vrtc.c9
-rw-r--r--arch/x86/xen/Kconfig11
-rw-r--r--arch/x86/xen/enlighten.c1
-rw-r--r--arch/x86/xen/mmu.c58
-rw-r--r--arch/x86/xen/p2m.c128
-rw-r--r--arch/x86/xen/setup.c282
-rw-r--r--arch/x86/xen/smp.c1
-rw-r--r--arch/x86/xen/time.c5
39 files changed, 504 insertions, 480 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 6a47bb22657f..9037289617ac 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -130,7 +130,7 @@ config SBUS
bool
config NEED_DMA_MAP_STATE
- def_bool (X86_64 || DMAR || DMA_API_DEBUG)
+ def_bool (X86_64 || INTEL_IOMMU || DMA_API_DEBUG)
config NEED_SG_DMA_LENGTH
def_bool y
@@ -220,7 +220,7 @@ config ARCH_SUPPORTS_DEBUG_PAGEALLOC
config HAVE_INTEL_TXT
def_bool y
- depends on EXPERIMENTAL && DMAR && ACPI
+ depends on EXPERIMENTAL && INTEL_IOMMU && ACPI
config X86_32_SMP
def_bool y
@@ -279,7 +279,7 @@ config SMP
Y to "Enhanced Real Time Clock Support", below. The "Advanced Power
Management" code will be disabled if you say Y here.
- See also <file:Documentation/i386/IO-APIC.txt>,
+ See also <file:Documentation/x86/i386/IO-APIC.txt>,
<file:Documentation/nmi_watchdog.txt> and the SMP-HOWTO available at
<http://www.tldp.org/docs.html#howto>.
@@ -287,7 +287,7 @@ config SMP
config X86_X2APIC
bool "Support x2apic"
- depends on X86_LOCAL_APIC && X86_64 && INTR_REMAP
+ depends on X86_LOCAL_APIC && X86_64 && IRQ_REMAP
---help---
This enables x2apic support on CPUs that have this feature.
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index c0f8a5c88910..bf56e1793272 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -139,7 +139,7 @@ config IOMMU_DEBUG
code. When you use it make sure you have a big enough
IOMMU/AGP aperture. Most of the options enabled by this can
be set more finegrained using the iommu= command line
- options. See Documentation/x86_64/boot-options.txt for more
+ options. See Documentation/x86/x86_64/boot-options.txt for more
details.
config IOMMU_STRESS
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
index 93e689f4bd86..bdb4d458ec8c 100644
--- a/arch/x86/boot/header.S
+++ b/arch/x86/boot/header.S
@@ -129,7 +129,7 @@ start_sys_seg: .word SYSSEG # obsolete and meaningless, but just
type_of_loader: .byte 0 # 0 means ancient bootloader, newer
# bootloaders know to change this.
- # See Documentation/i386/boot.txt for
+ # See Documentation/x86/boot.txt for
# assigned ids
# flags, unused bits must be zero (RFU) bit within loadflags
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index 22a0dc8e51dd..058a35b8286c 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -67,8 +67,8 @@ CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
CONFIG_CPU_FREQ_GOV_ONDEMAND=y
CONFIG_X86_ACPI_CPUFREQ=y
CONFIG_PCI_MMCONFIG=y
-CONFIG_DMAR=y
-# CONFIG_DMAR_DEFAULT_ON is not set
+CONFIG_INTEL_IOMMU=y
+# CONFIG_INTEL_IOMMU_DEFAULT_ON is not set
CONFIG_PCIEPORTBUS=y
CONFIG_PCCARD=y
CONFIG_YENTA=y
diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h
index 4554cc6fb96a..091508b533b4 100644
--- a/arch/x86/include/asm/alternative-asm.h
+++ b/arch/x86/include/asm/alternative-asm.h
@@ -16,7 +16,6 @@
#endif
.macro altinstruction_entry orig alt feature orig_len alt_len
- .align 8
.long \orig - .
.long \alt - .
.word \feature
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 23fb6d79f209..37ad100a2210 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -48,9 +48,6 @@ struct alt_instr {
u16 cpuid; /* cpuid bit set for replacement */
u8 instrlen; /* length of original instruction */
u8 replacementlen; /* length of new instruction, <= instrlen */
-#ifdef CONFIG_X86_64
- u32 pad2;
-#endif
};
extern void alternative_instructions(void);
@@ -83,7 +80,6 @@ static inline int alternatives_text_reserved(void *start, void *end)
\
"661:\n\t" oldinstr "\n662:\n" \
".section .altinstructions,\"a\"\n" \
- _ASM_ALIGN "\n" \
" .long 661b - .\n" /* label */ \
" .long 663f - .\n" /* new instruction */ \
" .word " __stringify(feature) "\n" /* feature bit */ \
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 4258aac99a6e..88b23a43f340 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -332,7 +332,6 @@ static __always_inline __pure bool __static_cpu_has(u16 bit)
asm goto("1: jmp %l[t_no]\n"
"2:\n"
".section .altinstructions,\"a\"\n"
- _ASM_ALIGN "\n"
" .long 1b - .\n"
" .long 0\n" /* no replacement */
" .word %P0\n" /* feature bit */
@@ -350,7 +349,6 @@ static __always_inline __pure bool __static_cpu_has(u16 bit)
asm volatile("1: movb $0,%0\n"
"2:\n"
".section .altinstructions,\"a\"\n"
- _ASM_ALIGN "\n"
" .long 1b - .\n"
" .long 3f - .\n"
" .word %P1\n" /* feature bit */
diff --git a/arch/x86/include/asm/device.h b/arch/x86/include/asm/device.h
index 029f230ab637..63a2a03d7d51 100644
--- a/arch/x86/include/asm/device.h
+++ b/arch/x86/include/asm/device.h
@@ -8,7 +8,7 @@ struct dev_archdata {
#ifdef CONFIG_X86_64
struct dma_map_ops *dma_ops;
#endif
-#if defined(CONFIG_DMAR) || defined(CONFIG_AMD_IOMMU)
+#if defined(CONFIG_INTEL_IOMMU) || defined(CONFIG_AMD_IOMMU)
void *iommu; /* hook for IOMMU specific extension */
#endif
};
diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h
index d4c419f883a0..ed3065fd6314 100644
--- a/arch/x86/include/asm/dma-mapping.h
+++ b/arch/x86/include/asm/dma-mapping.h
@@ -2,7 +2,7 @@
#define _ASM_X86_DMA_MAPPING_H
/*
- * IOMMU interface. See Documentation/PCI/PCI-DMA-mapping.txt and
+ * IOMMU interface. See Documentation/DMA-API-HOWTO.txt and
* Documentation/DMA-API.txt for documentation.
*/
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index 09199052060f..eb92a6ed2be7 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -119,7 +119,7 @@ struct irq_cfg {
cpumask_var_t old_domain;
u8 vector;
u8 move_in_progress : 1;
-#ifdef CONFIG_INTR_REMAP
+#ifdef CONFIG_IRQ_REMAP
struct irq_2_iommu irq_2_iommu;
#endif
};
diff --git a/arch/x86/include/asm/hyperv.h b/arch/x86/include/asm/hyperv.h
index 5df477ac3af7..b80420bcd09d 100644
--- a/arch/x86/include/asm/hyperv.h
+++ b/arch/x86/include/asm/hyperv.h
@@ -189,5 +189,6 @@
#define HV_STATUS_INVALID_HYPERCALL_CODE 2
#define HV_STATUS_INVALID_HYPERCALL_INPUT 3
#define HV_STATUS_INVALID_ALIGNMENT 4
+#define HV_STATUS_INSUFFICIENT_BUFFERS 19
#endif
diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h
index 1c23360fb2d8..47d99934580f 100644
--- a/arch/x86/include/asm/irq_remapping.h
+++ b/arch/x86/include/asm/irq_remapping.h
@@ -3,7 +3,8 @@
#define IRTE_DEST(dest) ((x2apic_mode) ? dest : dest << 8)
-#ifdef CONFIG_INTR_REMAP
+#ifdef CONFIG_IRQ_REMAP
+static void irq_remap_modify_chip_defaults(struct irq_chip *chip);
static inline void prepare_irte(struct irte *irte, int vector,
unsigned int dest)
{
@@ -36,6 +37,9 @@ static inline bool irq_remapped(struct irq_cfg *cfg)
{
return false;
}
+static inline void irq_remap_modify_chip_defaults(struct irq_chip *chip)
+{
+}
#endif
#endif /* _ASM_X86_IRQ_REMAPPING_H */
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index 7ff4669580cf..c34f96c2f7a0 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -12,6 +12,7 @@
#include <asm/pgtable.h>
#include <xen/interface/xen.h>
+#include <xen/grant_table.h>
#include <xen/features.h>
/* Xen machine address */
@@ -48,14 +49,11 @@ extern unsigned long set_phys_range_identity(unsigned long pfn_s,
unsigned long pfn_e);
extern int m2p_add_override(unsigned long mfn, struct page *page,
- bool clear_pte);
+ struct gnttab_map_grant_ref *kmap_op);
extern int m2p_remove_override(struct page *page, bool clear_pte);
extern struct page *m2p_find_override(unsigned long mfn);
extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn);
-#ifdef CONFIG_XEN_DEBUG_FS
-extern int p2m_dump_show(struct seq_file *m, void *v);
-#endif
static inline unsigned long pfn_to_mfn(unsigned long pfn)
{
unsigned long mfn;
diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c
index 8a439d364b94..b1e7c7f7a0af 100644
--- a/arch/x86/kernel/amd_gart_64.c
+++ b/arch/x86/kernel/amd_gart_64.c
@@ -5,7 +5,7 @@
* This allows to use PCI devices that only support 32bit addresses on systems
* with more than 4GB.
*
- * See Documentation/PCI/PCI-DMA-mapping.txt for the interface specification.
+ * See Documentation/DMA-API-HOWTO.txt for the interface specification.
*
* Copyright 2002 Andi Kleen, SuSE Labs.
* Subject to the GNU General Public License v2 only.
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 52fa56399a50..a2fd72e0ab35 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1437,27 +1437,21 @@ void enable_x2apic(void)
int __init enable_IR(void)
{
-#ifdef CONFIG_INTR_REMAP
+#ifdef CONFIG_IRQ_REMAP
if (!intr_remapping_supported()) {
pr_debug("intr-remapping not supported\n");
- return 0;
+ return -1;
}
if (!x2apic_preenabled && skip_ioapic_setup) {
pr_info("Skipped enabling intr-remap because of skipping "
"io-apic setup\n");
- return 0;
+ return -1;
}
- if (enable_intr_remapping(x2apic_supported()))
- return 0;
-
- pr_info("Enabled Interrupt-remapping\n");
-
- return 1;
-
+ return enable_intr_remapping();
#endif
- return 0;
+ return -1;
}
void __init enable_IR_x2apic(void)
@@ -1481,11 +1475,11 @@ void __init enable_IR_x2apic(void)
mask_ioapic_entries();
if (dmar_table_init_ret)
- ret = 0;
+ ret = -1;
else
ret = enable_IR();
- if (!ret) {
+ if (ret < 0) {
/* IR is required if there is APIC ID > 255 even when running
* under KVM
*/
@@ -1499,6 +1493,9 @@ void __init enable_IR_x2apic(void)
x2apic_force_phys();
}
+ if (ret == IRQ_REMAP_XAPIC_MODE)
+ goto nox2apic;
+
x2apic_enabled = 1;
if (x2apic_supported() && !x2apic_mode) {
@@ -1508,19 +1505,21 @@ void __init enable_IR_x2apic(void)
}
nox2apic:
- if (!ret) /* IR enabling failed */
+ if (ret < 0) /* IR enabling failed */
restore_ioapic_entries();
legacy_pic->restore_mask();
local_irq_restore(flags);
out:
- if (x2apic_enabled)
+ if (x2apic_enabled || !x2apic_supported())
return;
if (x2apic_preenabled)
panic("x2apic: enabled by BIOS but kernel init failed.");
- else if (cpu_has_x2apic)
- pr_info("Not enabling x2apic, Intr-remapping init failed.\n");
+ else if (ret == IRQ_REMAP_XAPIC_MODE)
+ pr_info("x2apic not enabled, IRQ remapping is in xapic mode\n");
+ else if (ret < 0)
+ pr_info("x2apic not enabled, IRQ remapping init failed\n");
}
#ifdef CONFIG_X86_64
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 8eb863e27ea6..229e19f3eb57 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -394,13 +394,21 @@ union entry_union {
struct IO_APIC_route_entry entry;
};
+static struct IO_APIC_route_entry __ioapic_read_entry(int apic, int pin)
+{
+ union entry_union eu;
+
+ eu.w1 = io_apic_read(apic, 0x10 + 2 * pin);
+ eu.w2 = io_apic_read(apic, 0x11 + 2 * pin);
+ return eu.entry;
+}
+
static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
{
union entry_union eu;
unsigned long flags;
raw_spin_lock_irqsave(&ioapic_lock, flags);
- eu.w1 = io_apic_read(apic, 0x10 + 2 * pin);
- eu.w2 = io_apic_read(apic, 0x11 + 2 * pin);
+ eu.entry = __ioapic_read_entry(apic, pin);
raw_spin_unlock_irqrestore(&ioapic_lock, flags);
return eu.entry;
}
@@ -529,18 +537,6 @@ static void io_apic_modify_irq(struct irq_cfg *cfg,
__io_apic_modify_irq(entry, mask_and, mask_or, final);
}
-static void __mask_and_edge_IO_APIC_irq(struct irq_pin_list *entry)
-{
- __io_apic_modify_irq(entry, ~IO_APIC_REDIR_LEVEL_TRIGGER,
- IO_APIC_REDIR_MASKED, NULL);
-}
-
-static void __unmask_and_level_IO_APIC_irq(struct irq_pin_list *entry)
-{
- __io_apic_modify_irq(entry, ~IO_APIC_REDIR_MASKED,
- IO_APIC_REDIR_LEVEL_TRIGGER, NULL);
-}
-
static void io_apic_sync(struct irq_pin_list *entry)
{
/*
@@ -585,6 +581,66 @@ static void unmask_ioapic_irq(struct irq_data *data)
unmask_ioapic(data->chip_data);
}
+/*
+ * IO-APIC versions below 0x20 don't support EOI register.
+ * For the record, here is the information about various versions:
+ * 0Xh 82489DX
+ * 1Xh I/OAPIC or I/O(x)APIC which are not PCI 2.2 Compliant
+ * 2Xh I/O(x)APIC which is PCI 2.2 Compliant
+ * 30h-FFh Reserved
+ *
+ * Some of the Intel ICH Specs (ICH2 to ICH5) documents the io-apic
+ * version as 0x2. This is an error with documentation and these ICH chips
+ * use io-apic's of version 0x20.
+ *
+ * For IO-APIC's with EOI register, we use that to do an explicit EOI.
+ * Otherwise, we simulate the EOI message manually by changing the trigger
+ * mode to edge and then back to level, with RTE being masked during this.
+ */
+static void __eoi_ioapic_pin(int apic, int pin, int vector, struct irq_cfg *cfg)
+{
+ if (mpc_ioapic_ver(apic) >= 0x20) {
+ /*
+ * Intr-remapping uses pin number as the virtual vector
+ * in the RTE. Actual vector is programmed in
+ * intr-remapping table entry. Hence for the io-apic
+ * EOI we use the pin number.
+ */
+ if (cfg && irq_remapped(cfg))
+ io_apic_eoi(apic, pin);
+ else
+ io_apic_eoi(apic, vector);
+ } else {
+ struct IO_APIC_route_entry entry, entry1;
+
+ entry = entry1 = __ioapic_read_entry(apic, pin);
+
+ /*
+ * Mask the entry and change the trigger mode to edge.
+ */
+ entry1.mask = 1;
+ entry1.trigger = IOAPIC_EDGE;
+
+ __ioapic_write_entry(apic, pin, entry1);
+
+ /*
+ * Restore the previous level triggered entry.
+ */
+ __ioapic_write_entry(apic, pin, entry);
+ }
+}
+
+static void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
+{
+ struct irq_pin_list *entry;
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&ioapic_lock, flags);
+ for_each_irq_pin(entry, cfg->irq_2_pin)
+ __eoi_ioapic_pin(entry->apic, entry->pin, cfg->vector, cfg);
+ raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
{
struct IO_APIC_route_entry entry;
@@ -593,10 +649,44 @@ static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
entry = ioapic_read_entry(apic, pin);
if (entry.delivery_mode == dest_SMI)
return;
+
+ /*
+ * Make sure the entry is masked and re-read the contents to check
+ * if it is a level triggered pin and if the remote-IRR is set.
+ */
+ if (!entry.mask) {
+ entry.mask = 1;
+ ioapic_write_entry(apic, pin, entry);
+ entry = ioapic_read_entry(apic, pin);
+ }
+
+ if (entry.irr) {
+ unsigned long flags;
+
+ /*
+ * Make sure the trigger mode is set to level. Explicit EOI
+ * doesn't clear the remote-IRR if the trigger mode is not
+ * set to level.
+ */
+ if (!entry.trigger) {
+ entry.trigger = IOAPIC_LEVEL;
+ ioapic_write_entry(apic, pin, entry);
+ }
+
+ raw_spin_lock_irqsave(&ioapic_lock, flags);
+ __eoi_ioapic_pin(apic, pin, entry.vector, NULL);
+ raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+ }
+
/*
- * Disable it in the IO-APIC irq-routing table:
+ * Clear the rest of the bits in the IO-APIC RTE except for the mask
+ * bit.
*/
ioapic_mask_entry(apic, pin);
+ entry = ioapic_read_entry(apic, pin);
+ if (entry.irr)
+ printk(KERN_ERR "Unable to reset IRR for apic: %d, pin :%d\n",
+ mpc_ioapic_id(apic), pin);
}
static void clear_IO_APIC (void)
@@ -1202,7 +1292,6 @@ void __setup_vector_irq(int cpu)
}
static struct irq_chip ioapic_chip;
-static struct irq_chip ir_ioapic_chip;
#ifdef CONFIG_X86_32
static inline int IO_APIC_irq_trigger(int irq)
@@ -1246,7 +1335,7 @@ static void ioapic_register_intr(unsigned int irq, struct irq_cfg *cfg,
if (irq_remapped(cfg)) {
irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
- chip = &ir_ioapic_chip;
+ irq_remap_modify_chip_defaults(chip);
fasteoi = trigger != 0;
}
@@ -2255,7 +2344,7 @@ ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
return ret;
}
-#ifdef CONFIG_INTR_REMAP
+#ifdef CONFIG_IRQ_REMAP
/*
* Migrate the IO-APIC irq in the presence of intr-remapping.
@@ -2267,6 +2356,9 @@ ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
* updated vector information), by using a virtual vector (io-apic pin number).
* Real vector that is used for interrupting cpu will be coming from
* the interrupt-remapping table entry.
+ *
+ * As the migration is a simple atomic update of IRTE, the same mechanism
+ * is used to migrate MSI irq's in the presence of interrupt-remapping.
*/
static int
ir_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
@@ -2291,10 +2383,16 @@ ir_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
irte.dest_id = IRTE_DEST(dest);
/*
- * Modified the IRTE and flushes the Interrupt entry cache.
+ * Atomically updates the IRTE with the new destination, vector
+ * and flushes the interrupt entry cache.
*/
modify_irte(irq, &irte);
+ /*
+ * After this point, all the interrupts will start arriving
+ * at the new destination. So, time to cleanup the previous
+ * vector allocation.
+ */
if (cfg->move_in_progress)
send_cleanup_vector(cfg);
@@ -2407,48 +2505,6 @@ static void ack_apic_edge(struct irq_data *data)
atomic_t irq_mis_count;
-/*
- * IO-APIC versions below 0x20 don't support EOI register.
- * For the record, here is the information about various versions:
- * 0Xh 82489DX
- * 1Xh I/OAPIC or I/O(x)APIC which are not PCI 2.2 Compliant
- * 2Xh I/O(x)APIC which is PCI 2.2 Compliant
- * 30h-FFh Reserved
- *
- * Some of the Intel ICH Specs (ICH2 to ICH5) documents the io-apic
- * version as 0x2. This is an error with documentation and these ICH chips
- * use io-apic's of version 0x20.
- *
- * For IO-APIC's with EOI register, we use that to do an explicit EOI.
- * Otherwise, we simulate the EOI message manually by changing the trigger
- * mode to edge and then back to level, with RTE being masked during this.
-*/
-static void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
-{
- struct irq_pin_list *entry;
- unsigned long flags;
-
- raw_spin_lock_irqsave(&ioapic_lock, flags);
- for_each_irq_pin(entry, cfg->irq_2_pin) {
- if (mpc_ioapic_ver(entry->apic) >= 0x20) {
- /*
- * Intr-remapping uses pin number as the virtual vector
- * in the RTE. Actual vector is programmed in
- * intr-remapping table entry. Hence for the io-apic
- * EOI we use the pin number.
- */
- if (irq_remapped(cfg))
- io_apic_eoi(entry->apic, entry->pin);
- else
- io_apic_eoi(entry->apic, cfg->vector);
- } else {
- __mask_and_edge_IO_APIC_irq(entry);
- __unmask_and_level_IO_APIC_irq(entry);
- }
- }
- raw_spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-
static void ack_apic_level(struct irq_data *data)
{
struct irq_cfg *cfg = data->chip_data;
@@ -2552,7 +2608,7 @@ static void ack_apic_level(struct irq_data *data)
}
}
-#ifdef CONFIG_INTR_REMAP
+#ifdef CONFIG_IRQ_REMAP
static void ir_ack_apic_edge(struct irq_data *data)
{
ack_APIC_irq();
@@ -2563,7 +2619,23 @@ static void ir_ack_apic_level(struct irq_data *data)
ack_APIC_irq();
eoi_ioapic_irq(data->irq, data->chip_data);
}
-#endif /* CONFIG_INTR_REMAP */
+
+static void ir_print_prefix(struct irq_data *data, struct seq_file *p)
+{
+ seq_printf(p, " IR-%s", data->chip->name);
+}
+
+static void irq_remap_modify_chip_defaults(struct irq_chip *chip)
+{
+ chip->irq_print_chip = ir_print_prefix;
+ chip->irq_ack = ir_ack_apic_edge;
+ chip->irq_eoi = ir_ack_apic_level;
+
+#ifdef CONFIG_SMP
+ chip->irq_set_affinity = ir_ioapic_set_affinity;
+#endif
+}
+#endif /* CONFIG_IRQ_REMAP */
static struct irq_chip ioapic_chip __read_mostly = {
.name = "IO-APIC",
@@ -2578,21 +2650,6 @@ static struct irq_chip ioapic_chip __read_mostly = {
.irq_retrigger = ioapic_retrigger_irq,
};
-static struct irq_chip ir_ioapic_chip __read_mostly = {
- .name = "IR-IO-APIC",
- .irq_startup = startup_ioapic_irq,
- .irq_mask = mask_ioapic_irq,
- .irq_unmask = unmask_ioapic_irq,
-#ifdef CONFIG_INTR_REMAP
- .irq_ack = ir_ack_apic_edge,
- .irq_eoi = ir_ack_apic_level,
-#ifdef CONFIG_SMP
- .irq_set_affinity = ir_ioapic_set_affinity,
-#endif
-#endif
- .irq_retrigger = ioapic_retrigger_irq,
-};
-
static inline void init_IO_APIC_traps(void)
{
struct irq_cfg *cfg;
@@ -3144,45 +3201,6 @@ msi_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force)
return 0;
}
-#ifdef CONFIG_INTR_REMAP
-/*
- * Migrate the MSI irq to another cpumask. This migration is
- * done in the process context using interrupt-remapping hardware.
- */
-static int
-ir_msi_set_affinity(struct irq_data *data, const struct cpumask *mask,
- bool force)
-{
- struct irq_cfg *cfg = data->chip_data;
- unsigned int dest, irq = data->irq;
- struct irte irte;
-
- if (get_irte(irq, &irte))
- return -1;
-
- if (__ioapic_set_affinity(data, mask, &dest))
- return -1;
-
- irte.vector = cfg->vector;
- irte.dest_id = IRTE_DEST(dest);
-
- /*
- * atomically update the IRTE with the new destination and vector.
- */
- modify_irte(irq, &irte);
-
- /*
- * After this point, all the interrupts will start arriving
- * at the new destination. So, time to cleanup the previous
- * vector allocation.
- */
- if (cfg->move_in_progress)
- send_cleanup_vector(cfg);
-
- return 0;
-}
-
-#endif
#endif /* CONFIG_SMP */
/*
@@ -3200,19 +3218,6 @@ static struct irq_chip msi_chip = {
.irq_retrigger = ioapic_retrigger_irq,
};
-static struct irq_chip msi_ir_chip = {
- .name = "IR-PCI-MSI",
- .irq_unmask = unmask_msi_irq,
- .irq_mask = mask_msi_irq,
-#ifdef CONFIG_INTR_REMAP
- .irq_ack = ir_ack_apic_edge,
-#ifdef CONFIG_SMP
- .irq_set_affinity = ir_msi_set_affinity,
-#endif
-#endif
- .irq_retrigger = ioapic_retrigger_irq,
-};
-
/*
* Map the PCI dev to the corresponding remapping hardware unit
* and allocate 'nvec' consecutive interrupt-remapping table entries
@@ -3255,7 +3260,7 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
if (irq_remapped(irq_get_chip_data(irq))) {
irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
- chip = &msi_ir_chip;
+ irq_remap_modify_chip_defaults(chip);
}
irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge");
@@ -3328,7 +3333,7 @@ void native_teardown_msi_irq(unsigned int irq)
destroy_irq(irq);
}
-#if defined (CONFIG_DMAR) || defined (CONFIG_INTR_REMAP)
+#ifdef CONFIG_DMAR_TABLE
#ifdef CONFIG_SMP
static int
dmar_msi_set_affinity(struct irq_data *data, const struct cpumask *mask,
@@ -3409,19 +3414,6 @@ static int hpet_msi_set_affinity(struct irq_data *data,
#endif /* CONFIG_SMP */
-static struct irq_chip ir_hpet_msi_type = {
- .name = "IR-HPET_MSI",
- .irq_unmask = hpet_msi_unmask,
- .irq_mask = hpet_msi_mask,
-#ifdef CONFIG_INTR_REMAP
- .irq_ack = ir_ack_apic_edge,
-#ifdef CONFIG_SMP
- .irq_set_affinity = ir_msi_set_affinity,
-#endif
-#endif
- .irq_retrigger = ioapic_retrigger_irq,
-};
-
static struct irq_chip hpet_msi_type = {
.name = "HPET_MSI",
.irq_unmask = hpet_msi_unmask,
@@ -3458,7 +3450,7 @@ int arch_setup_hpet_msi(unsigned int irq, unsigned int id)
hpet_msi_write(irq_get_handler_data(irq), &msg);
irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
if (irq_remapped(irq_get_chip_data(irq)))
- chip = &ir_hpet_msi_type;
+ irq_remap_modify_chip_defaults(chip);
irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge");
return 0;
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 0371c484bb8a..a46bd383953c 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -249,8 +249,6 @@ extern int (*console_blank_hook)(int);
#define APM_MINOR_DEV 134
/*
- * See Documentation/Config.help for the configuration options.
- *
* Various options can be changed at boot time as follows:
* (We allow underscores for compatibility with the modules code)
* apm=on/off enable/disable APM
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index 8694ef56459d..38e49bc95ffc 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -28,7 +28,7 @@ static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned);
* cmci_discover_lock protects against parallel discovery attempts
* which could race against each other.
*/
-static DEFINE_SPINLOCK(cmci_discover_lock);
+static DEFINE_RAW_SPINLOCK(cmci_discover_lock);
#define CMCI_THRESHOLD 1
@@ -85,7 +85,7 @@ static void cmci_discover(int banks, int boot)
int hdr = 0;
int i;
- spin_lock_irqsave(&cmci_discover_lock, flags);
+ raw_spin_lock_irqsave(&cmci_discover_lock, flags);
for (i = 0; i < banks; i++) {
u64 val;
@@ -116,7 +116,7 @@ static void cmci_discover(int banks, int boot)
WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks)));
}
}
- spin_unlock_irqrestore(&cmci_discover_lock, flags);
+ raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
if (hdr)
printk(KERN_CONT "\n");
}
@@ -150,7 +150,7 @@ void cmci_clear(void)
if (!cmci_supported(&banks))
return;
- spin_lock_irqsave(&cmci_discover_lock, flags);
+ raw_spin_lock_irqsave(&cmci_discover_lock, flags);
for (i = 0; i < banks; i++) {
if (!test_bit(i, __get_cpu_var(mce_banks_owned)))
continue;
@@ -160,7 +160,7 @@ void cmci_clear(void)
wrmsrl(MSR_IA32_MCx_CTL2(i), val);
__clear_bit(i, __get_cpu_var(mce_banks_owned));
}
- spin_unlock_irqrestore(&cmci_discover_lock, flags);
+ raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
}
/*
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index f1a6244d7d93..794bc95134cd 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -75,8 +75,10 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
/*
* Undefined/reserved opcodes, conditional jump, Opcode Extension
* Groups, and some special opcodes can not boost.
+ * This is non-const to keep gcc from statically optimizing it out, as
+ * variable_test_bit makes gcc think only *(unsigned long*) is used.
*/
-static const u32 twobyte_is_boostable[256 / 32] = {
+static u32 twobyte_is_boostable[256 / 32] = {
/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
/* ---------------------------------------------- */
W(0x00, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0) | /* 00 */
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index b49d00da2aed..622872054fbe 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -117,8 +117,8 @@ again:
}
/*
- * See <Documentation/x86_64/boot-options.txt> for the iommu kernel parameter
- * documentation.
+ * See <Documentation/x86/x86_64/boot-options.txt> for the iommu kernel
+ * parameter documentation.
*/
static __init int iommu_setup(char *p)
{
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 7a3b65107a27..2196c703c5e2 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -262,7 +262,7 @@ EXPORT_SYMBOL_GPL(start_thread);
/*
- * switch_to(x,yn) should switch tasks from x to y.
+ * switch_to(x,y) should switch tasks from x to y.
*
* We fsave/fwait so that an exception goes off at the right time
* (as a call from the fsave or fwait in effect) rather than to
diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c
index 3f2ad2640d85..ccdbc16b8941 100644
--- a/arch/x86/kernel/rtc.c
+++ b/arch/x86/kernel/rtc.c
@@ -42,8 +42,11 @@ int mach_set_rtc_mmss(unsigned long nowtime)
{
int real_seconds, real_minutes, cmos_minutes;
unsigned char save_control, save_freq_select;
+ unsigned long flags;
int retval = 0;
+ spin_lock_irqsave(&rtc_lock, flags);
+
/* tell the clock it's being set */
save_control = CMOS_READ(RTC_CONTROL);
CMOS_WRITE((save_control|RTC_SET), RTC_CONTROL);
@@ -93,12 +96,17 @@ int mach_set_rtc_mmss(unsigned long nowtime)
CMOS_WRITE(save_control, RTC_CONTROL);
CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
+ spin_unlock_irqrestore(&rtc_lock, flags);
+
return retval;
}
unsigned long mach_get_cmos_time(void)
{
unsigned int status, year, mon, day, hour, min, sec, century = 0;
+ unsigned long flags;
+
+ spin_lock_irqsave(&rtc_lock, flags);
/*
* If UIP is clear, then we have >= 244 microseconds before
@@ -125,6 +133,8 @@ unsigned long mach_get_cmos_time(void)
status = CMOS_READ(RTC_CONTROL);
WARN_ON_ONCE(RTC_ALWAYS_BCD && (status & RTC_DM_BINARY));
+ spin_unlock_irqrestore(&rtc_lock, flags);
+
if (RTC_ALWAYS_BCD || !(status & RTC_DM_BINARY)) {
sec = bcd2bin(sec);
min = bcd2bin(min);
@@ -169,24 +179,15 @@ EXPORT_SYMBOL(rtc_cmos_write);
int update_persistent_clock(struct timespec now)
{
- unsigned long flags;
- int retval;
-
- spin_lock_irqsave(&rtc_lock, flags);
- retval = x86_platform.set_wallclock(now.tv_sec);
- spin_unlock_irqrestore(&rtc_lock, flags);
-
- return retval;
+ return x86_platform.set_wallclock(now.tv_sec);
}
/* not static: needed by APM */
void read_persistent_clock(struct timespec *ts)
{
- unsigned long retval, flags;
+ unsigned long retval;
- spin_lock_irqsave(&rtc_lock, flags);
retval = x86_platform.get_wallclock();
- spin_unlock_irqrestore(&rtc_lock, flags);
ts->tv_sec = retval;
ts->tv_nsec = 0;
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 18ae83dd1cd7..b56c65de384d 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -56,7 +56,7 @@ DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data) =
.lock = __SEQLOCK_UNLOCKED(__vsyscall_gtod_data.lock),
};
-static enum { EMULATE, NATIVE, NONE } vsyscall_mode = EMULATE;
+static enum { EMULATE, NATIVE, NONE } vsyscall_mode = NATIVE;
static int __init vsyscall_setup(char *str)
{
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 6f08bc940fa8..8b4cc5f067de 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -3603,7 +3603,7 @@ done_prefixes:
break;
case Src2CL:
ctxt->src2.bytes = 1;
- ctxt->src2.val = ctxt->regs[VCPU_REGS_RCX] & 0x8;
+ ctxt->src2.val = ctxt->regs[VCPU_REGS_RCX] & 0xff;
break;
case Src2ImmByte:
rc = decode_imm(ctxt, &ctxt->src2, 1, true);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 1c5b69373a00..8e8da7960dbe 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -400,7 +400,8 @@ static u64 __update_clear_spte_slow(u64 *sptep, u64 spte)
/* xchg acts as a barrier before the setting of the high bits */
orig.spte_low = xchg(&ssptep->spte_low, sspte.spte_low);
- orig.spte_high = ssptep->spte_high = sspte.spte_high;
+ orig.spte_high = ssptep->spte_high;
+ ssptep->spte_high = sspte.spte_high;
count_spte_clear(sptep, spte);
return orig.spte;
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 30326443ab81..87488b93a65c 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -63,9 +63,8 @@ static void __init find_early_table_space(unsigned long end, int use_pse,
#ifdef CONFIG_X86_32
/* for fixmap */
tables += roundup(__end_of_fixed_addresses * sizeof(pte_t), PAGE_SIZE);
-
- good_end = max_pfn_mapped << PAGE_SHIFT;
#endif
+ good_end = max_pfn_mapped << PAGE_SHIFT;
base = memblock_find_in_range(start, good_end, tables, PAGE_SIZE);
if (base == MEMBLOCK_ERROR)
diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c
index 67421f38a215..de54b9b278a7 100644
--- a/arch/x86/mm/mmio-mod.c
+++ b/arch/x86/mm/mmio-mod.c
@@ -29,7 +29,6 @@
#include <linux/slab.h>
#include <linux/uaccess.h>
#include <linux/io.h>
-#include <linux/version.h>
#include <linux/kallsyms.h>
#include <asm/pgtable.h>
#include <linux/mmiotrace.h>
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index 68894fdc034b..96646b3aeca8 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -355,10 +355,10 @@ static void nmi_cpu_setup(void *dummy)
int cpu = smp_processor_id();
struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu);
nmi_cpu_save_registers(msrs);
- spin_lock(&oprofilefs_lock);
+ raw_spin_lock(&oprofilefs_lock);
model->setup_ctrs(model, msrs);
nmi_cpu_setup_mux(cpu, msrs);
- spin_unlock(&oprofilefs_lock);
+ raw_spin_unlock(&oprofilefs_lock);
per_cpu(saved_lvtpc, cpu) = apic_read(APIC_LVTPC);
apic_write(APIC_LVTPC, APIC_DM_NMI);
}
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index 039d91315bc5..404f21a3ff9e 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -43,6 +43,17 @@ static const struct dmi_system_id pci_use_crs_table[] __initconst = {
DMI_MATCH(DMI_PRODUCT_NAME, "ALiveSATA2-GLAN"),
},
},
+ /* https://bugzilla.kernel.org/show_bug.cgi?id=30552 */
+ /* 2006 AMD HT/VIA system with two host bridges */
+ {
+ .callback = set_use_crs,
+ .ident = "ASUS M2V-MX SE",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
+ DMI_MATCH(DMI_BOARD_NAME, "M2V-MX SE"),
+ DMI_MATCH(DMI_BIOS_VENDOR, "American Megatrends Inc."),
+ },
+ },
{}
};
diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index 1017c7bee388..492ade8c978e 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -175,8 +175,10 @@ static int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
"pcifront-msi-x" :
"pcifront-msi",
DOMID_SELF);
- if (irq < 0)
+ if (irq < 0) {
+ ret = irq;
goto free;
+ }
i++;
}
kfree(v);
@@ -221,8 +223,10 @@ static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
if (msg.data != XEN_PIRQ_MSI_DATA ||
xen_irq_from_pirq(pirq) < 0) {
pirq = xen_allocate_pirq_msi(dev, msidesc);
- if (pirq < 0)
+ if (pirq < 0) {
+ irq = -ENODEV;
goto error;
+ }
xen_msi_compose_msg(dev, pirq, &msg);
__write_msi_msg(msidesc, &msg);
dev_dbg(&dev->dev, "xen: msi bound to pirq=%d\n", pirq);
@@ -244,10 +248,12 @@ static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
error:
dev_err(&dev->dev,
"Xen PCI frontend has not registered MSI/MSI-X support!\n");
- return -ENODEV;
+ return irq;
}
#ifdef CONFIG_XEN_DOM0
+static bool __read_mostly pci_seg_supported = true;
+
static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
{
int ret = 0;
@@ -265,10 +271,11 @@ static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
memset(&map_irq, 0, sizeof(map_irq));
map_irq.domid = domid;
- map_irq.type = MAP_PIRQ_TYPE_MSI;
+ map_irq.type = MAP_PIRQ_TYPE_MSI_SEG;
map_irq.index = -1;
map_irq.pirq = -1;
- map_irq.bus = dev->bus->number;
+ map_irq.bus = dev->bus->number |
+ (pci_domain_nr(dev->bus) << 16);
map_irq.devfn = dev->devfn;
if (type == PCI_CAP_ID_MSIX) {
@@ -285,7 +292,20 @@ static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
map_irq.entry_nr = msidesc->msi_attrib.entry_nr;
}
- ret = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq);
+ ret = -EINVAL;
+ if (pci_seg_supported)
+ ret = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq,
+ &map_irq);
+ if (ret == -EINVAL && !pci_domain_nr(dev->bus)) {
+ map_irq.type = MAP_PIRQ_TYPE_MSI;
+ map_irq.index = -1;
+ map_irq.pirq = -1;
+ map_irq.bus = dev->bus->number;
+ ret = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq,
+ &map_irq);
+ if (ret != -EINVAL)
+ pci_seg_supported = false;
+ }
if (ret) {
dev_warn(&dev->dev, "xen map irq failed %d for %d domain\n",
ret, domid);
diff --git a/arch/x86/platform/mrst/mrst.c b/arch/x86/platform/mrst/mrst.c
index 58425adc22c6..e6379526675b 100644
--- a/arch/x86/platform/mrst/mrst.c
+++ b/arch/x86/platform/mrst/mrst.c
@@ -14,6 +14,8 @@
#include <linux/init.h>
#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/scatterlist.h>
#include <linux/sfi.h>
#include <linux/intel_pmic_gpio.h>
#include <linux/spi/spi.h>
@@ -392,6 +394,7 @@ static void __init *max3111_platform_data(void *info)
struct spi_board_info *spi_info = info;
int intr = get_gpio_by_name("max3111_int");
+ spi_info->mode = SPI_MODE_0;
if (intr == -1)
return NULL;
spi_info->irq = intr + MRST_IRQ_OFFSET;
@@ -678,38 +681,40 @@ static int __init sfi_parse_devs(struct sfi_table_header *table)
pentry = (struct sfi_device_table_entry *)sb->pentry;
for (i = 0; i < num; i++, pentry++) {
- if (pentry->irq != (u8)0xff) { /* native RTE case */
+ int irq = pentry->irq;
+
+ if (irq != (u8)0xff) { /* native RTE case */
/* these SPI2 devices are not exposed to system as PCI
* devices, but they have separate RTE entry in IOAPIC
* so we have to enable them one by one here
*/
- ioapic = mp_find_ioapic(pentry->irq);
+ ioapic = mp_find_ioapic(irq);
irq_attr.ioapic = ioapic;
- irq_attr.ioapic_pin = pentry->irq;
+ irq_attr.ioapic_pin = irq;
irq_attr.trigger = 1;
irq_attr.polarity = 1;
- io_apic_set_pci_routing(NULL, pentry->irq, &irq_attr);
+ io_apic_set_pci_routing(NULL, irq, &irq_attr);
} else
- pentry->irq = 0; /* No irq */
+ irq = 0; /* No irq */
switch (pentry->type) {
case SFI_DEV_TYPE_IPC:
/* ID as IRQ is a hack that will go away */
- pdev = platform_device_alloc(pentry->name, pentry->irq);
+ pdev = platform_device_alloc(pentry->name, irq);
if (pdev == NULL) {
pr_err("out of memory for SFI platform device '%s'.\n",
pentry->name);
continue;
}
- install_irq_resource(pdev, pentry->irq);
+ install_irq_resource(pdev, irq);
pr_debug("info[%2d]: IPC bus, name = %16.16s, "
- "irq = 0x%2x\n", i, pentry->name, pentry->irq);
+ "irq = 0x%2x\n", i, pentry->name, irq);
sfi_handle_ipc_dev(pdev);
break;
case SFI_DEV_TYPE_SPI:
memset(&spi_info, 0, sizeof(spi_info));
strncpy(spi_info.modalias, pentry->name, SFI_NAME_LEN);
- spi_info.irq = pentry->irq;
+ spi_info.irq = irq;
spi_info.bus_num = pentry->host_num;
spi_info.chip_select = pentry->addr;
spi_info.max_speed_hz = pentry->max_freq;
@@ -726,7 +731,7 @@ static int __init sfi_parse_devs(struct sfi_table_header *table)
memset(&i2c_info, 0, sizeof(i2c_info));
bus = pentry->host_num;
strncpy(i2c_info.type, pentry->name, SFI_NAME_LEN);
- i2c_info.irq = pentry->irq;
+ i2c_info.irq = irq;
i2c_info.addr = pentry->addr;
pr_debug("info[%2d]: I2C bus = %d, name = %16.16s, "
"irq = 0x%2x, addr = 0x%x\n", i, bus,
diff --git a/arch/x86/platform/mrst/vrtc.c b/arch/x86/platform/mrst/vrtc.c
index 73d70d65e76e..6d5dbcdd444a 100644
--- a/arch/x86/platform/mrst/vrtc.c
+++ b/arch/x86/platform/mrst/vrtc.c
@@ -58,8 +58,11 @@ EXPORT_SYMBOL_GPL(vrtc_cmos_write);
unsigned long vrtc_get_time(void)
{
u8 sec, min, hour, mday, mon;
+ unsigned long flags;
u32 year;
+ spin_lock_irqsave(&rtc_lock, flags);
+
while ((vrtc_cmos_read(RTC_FREQ_SELECT) & RTC_UIP))
cpu_relax();
@@ -70,6 +73,8 @@ unsigned long vrtc_get_time(void)
mon = vrtc_cmos_read(RTC_MONTH);
year = vrtc_cmos_read(RTC_YEAR);
+ spin_unlock_irqrestore(&rtc_lock, flags);
+
/* vRTC YEAR reg contains the offset to 1960 */
year += 1960;
@@ -83,8 +88,10 @@ unsigned long vrtc_get_time(void)
int vrtc_set_mmss(unsigned long nowtime)
{
int real_sec, real_min;
+ unsigned long flags;
int vrtc_min;
+ spin_lock_irqsave(&rtc_lock, flags);
vrtc_min = vrtc_cmos_read(RTC_MINUTES);
real_sec = nowtime % 60;
@@ -95,6 +102,8 @@ int vrtc_set_mmss(unsigned long nowtime)
vrtc_cmos_write(real_sec, RTC_SECONDS);
vrtc_cmos_write(real_min, RTC_MINUTES);
+ spin_unlock_irqrestore(&rtc_lock, flags);
+
return 0;
}
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index 5cc821cb2e09..26c731a106af 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -25,8 +25,7 @@ config XEN_PRIVILEGED_GUEST
config XEN_PVHVM
def_bool y
- depends on XEN
- depends on X86_LOCAL_APIC
+ depends on XEN && PCI && X86_LOCAL_APIC
config XEN_MAX_DOMAIN_MEMORY
int
@@ -49,11 +48,3 @@ config XEN_DEBUG_FS
help
Enable statistics output and various tuning options in debugfs.
Enabling this option may incur a significant performance overhead.
-
-config XEN_DEBUG
- bool "Enable Xen debug checks"
- depends on XEN
- default n
- help
- Enable various WARN_ON checks in the Xen MMU code.
- Enabling this option WILL incur a significant performance overhead.
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 2d69617950f7..da8afd576a6b 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -251,6 +251,7 @@ static void __init xen_init_cpuid_mask(void)
~((1 << X86_FEATURE_APIC) | /* disable local APIC */
(1 << X86_FEATURE_ACPI)); /* disable ACPI */
ax = 1;
+ cx = 0;
xen_cpuid(&ax, &bx, &cx, &dx);
xsave_mask =
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 20a614275064..87f6673b1207 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -495,41 +495,6 @@ static pte_t xen_make_pte(pteval_t pte)
}
PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte);
-#ifdef CONFIG_XEN_DEBUG
-pte_t xen_make_pte_debug(pteval_t pte)
-{
- phys_addr_t addr = (pte & PTE_PFN_MASK);
- phys_addr_t other_addr;
- bool io_page = false;
- pte_t _pte;
-
- if (pte & _PAGE_IOMAP)
- io_page = true;
-
- _pte = xen_make_pte(pte);
-
- if (!addr)
- return _pte;
-
- if (io_page &&
- (xen_initial_domain() || addr >= ISA_END_ADDRESS)) {
- other_addr = pfn_to_mfn(addr >> PAGE_SHIFT) << PAGE_SHIFT;
- WARN_ONCE(addr != other_addr,
- "0x%lx is using VM_IO, but it is 0x%lx!\n",
- (unsigned long)addr, (unsigned long)other_addr);
- } else {
- pteval_t iomap_set = (_pte.pte & PTE_FLAGS_MASK) & _PAGE_IOMAP;
- other_addr = (_pte.pte & PTE_PFN_MASK);
- WARN_ONCE((addr == other_addr) && (!io_page) && (!iomap_set),
- "0x%lx is missing VM_IO (and wasn't fixed)!\n",
- (unsigned long)addr);
- }
-
- return _pte;
-}
-PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte_debug);
-#endif
-
static pgd_t xen_make_pgd(pgdval_t pgd)
{
pgd = pte_pfn_to_mfn(pgd);
@@ -1721,10 +1686,8 @@ void __init xen_setup_machphys_mapping(void)
machine_to_phys_nr = MACH2PHYS_NR_ENTRIES;
}
#ifdef CONFIG_X86_32
- if ((machine_to_phys_mapping + machine_to_phys_nr)
- < machine_to_phys_mapping)
- machine_to_phys_nr = (unsigned long *)NULL
- - machine_to_phys_mapping;
+ WARN_ON((machine_to_phys_mapping + (machine_to_phys_nr - 1))
+ < machine_to_phys_mapping);
#endif
}
@@ -1994,9 +1957,6 @@ void __init xen_ident_map_ISA(void)
static void __init xen_post_allocator_init(void)
{
-#ifdef CONFIG_XEN_DEBUG
- pv_mmu_ops.make_pte = PV_CALLEE_SAVE(xen_make_pte_debug);
-#endif
pv_mmu_ops.set_pte = xen_set_pte;
pv_mmu_ops.set_pmd = xen_set_pmd;
pv_mmu_ops.set_pud = xen_set_pud;
@@ -2406,17 +2366,3 @@ out:
return err;
}
EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range);
-
-#ifdef CONFIG_XEN_DEBUG_FS
-static int p2m_dump_open(struct inode *inode, struct file *filp)
-{
- return single_open(filp, p2m_dump_show, NULL);
-}
-
-static const struct file_operations p2m_dump_fops = {
- .open = p2m_dump_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-#endif /* CONFIG_XEN_DEBUG_FS */
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 58efeb9d5440..1b267e75158d 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -161,7 +161,9 @@
#include <asm/xen/page.h>
#include <asm/xen/hypercall.h>
#include <asm/xen/hypervisor.h>
+#include <xen/grant_table.h>
+#include "multicalls.h"
#include "xen-ops.h"
static void __init m2p_override_init(void);
@@ -676,7 +678,8 @@ static unsigned long mfn_hash(unsigned long mfn)
}
/* Add an MFN override for a particular page */
-int m2p_add_override(unsigned long mfn, struct page *page, bool clear_pte)
+int m2p_add_override(unsigned long mfn, struct page *page,
+ struct gnttab_map_grant_ref *kmap_op)
{
unsigned long flags;
unsigned long pfn;
@@ -692,16 +695,28 @@ int m2p_add_override(unsigned long mfn, struct page *page, bool clear_pte)
"m2p_add_override: pfn %lx not mapped", pfn))
return -EINVAL;
}
-
- page->private = mfn;
+ WARN_ON(PagePrivate(page));
+ SetPagePrivate(page);
+ set_page_private(page, mfn);
page->index = pfn_to_mfn(pfn);
if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn))))
return -ENOMEM;
- if (clear_pte && !PageHighMem(page))
- /* Just zap old mapping for now */
- pte_clear(&init_mm, address, ptep);
+ if (kmap_op != NULL) {
+ if (!PageHighMem(page)) {
+ struct multicall_space mcs =
+ xen_mc_entry(sizeof(*kmap_op));
+
+ MULTI_grant_table_op(mcs.mc,
+ GNTTABOP_map_grant_ref, kmap_op, 1);
+
+ xen_mc_issue(PARAVIRT_LAZY_MMU);
+ }
+ /* let's use dev_bus_addr to record the old mfn instead */
+ kmap_op->dev_bus_addr = page->index;
+ page->index = (unsigned long) kmap_op;
+ }
spin_lock_irqsave(&m2p_override_lock, flags);
list_add(&page->lru, &m2p_overrides[mfn_hash(mfn)]);
spin_unlock_irqrestore(&m2p_override_lock, flags);
@@ -735,13 +750,56 @@ int m2p_remove_override(struct page *page, bool clear_pte)
spin_lock_irqsave(&m2p_override_lock, flags);
list_del(&page->lru);
spin_unlock_irqrestore(&m2p_override_lock, flags);
- set_phys_to_machine(pfn, page->index);
+ WARN_ON(!PagePrivate(page));
+ ClearPagePrivate(page);
- if (clear_pte && !PageHighMem(page))
- set_pte_at(&init_mm, address, ptep,
- pfn_pte(pfn, PAGE_KERNEL));
- /* No tlb flush necessary because the caller already
- * left the pte unmapped. */
+ if (clear_pte) {
+ struct gnttab_map_grant_ref *map_op =
+ (struct gnttab_map_grant_ref *) page->index;
+ set_phys_to_machine(pfn, map_op->dev_bus_addr);
+ if (!PageHighMem(page)) {
+ struct multicall_space mcs;
+ struct gnttab_unmap_grant_ref *unmap_op;
+
+ /*
+ * It might be that we queued all the m2p grant table
+ * hypercalls in a multicall, then m2p_remove_override
+ * get called before the multicall has actually been
+ * issued. In this case handle is going to -1 because
+ * it hasn't been modified yet.
+ */
+ if (map_op->handle == -1)
+ xen_mc_flush();
+ /*
+ * Now if map_op->handle is negative it means that the
+ * hypercall actually returned an error.
+ */
+ if (map_op->handle == GNTST_general_error) {
+ printk(KERN_WARNING "m2p_remove_override: "
+ "pfn %lx mfn %lx, failed to modify kernel mappings",
+ pfn, mfn);
+ return -1;
+ }
+
+ mcs = xen_mc_entry(
+ sizeof(struct gnttab_unmap_grant_ref));
+ unmap_op = mcs.args;
+ unmap_op->host_addr = map_op->host_addr;
+ unmap_op->handle = map_op->handle;
+ unmap_op->dev_bus_addr = 0;
+
+ MULTI_grant_table_op(mcs.mc,
+ GNTTABOP_unmap_grant_ref, unmap_op, 1);
+
+ xen_mc_issue(PARAVIRT_LAZY_MMU);
+
+ set_pte_at(&init_mm, address, ptep,
+ pfn_pte(pfn, PAGE_KERNEL));
+ __flush_tlb_single(address);
+ map_op->host_addr = 0;
+ }
+ } else
+ set_phys_to_machine(pfn, page->index);
return 0;
}
@@ -758,7 +816,7 @@ struct page *m2p_find_override(unsigned long mfn)
spin_lock_irqsave(&m2p_override_lock, flags);
list_for_each_entry(p, bucket, lru) {
- if (p->private == mfn) {
+ if (page_private(p) == mfn) {
ret = p;
break;
}
@@ -782,17 +840,21 @@ unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn)
EXPORT_SYMBOL_GPL(m2p_find_override_pfn);
#ifdef CONFIG_XEN_DEBUG_FS
-
-int p2m_dump_show(struct seq_file *m, void *v)
+#include <linux/debugfs.h>
+#include "debugfs.h"
+static int p2m_dump_show(struct seq_file *m, void *v)
{
static const char * const level_name[] = { "top", "middle",
- "entry", "abnormal" };
- static const char * const type_name[] = { "identity", "missing",
- "pfn", "abnormal"};
+ "entry", "abnormal", "error"};
#define TYPE_IDENTITY 0
#define TYPE_MISSING 1
#define TYPE_PFN 2
#define TYPE_UNKNOWN 3
+ static const char * const type_name[] = {
+ [TYPE_IDENTITY] = "identity",
+ [TYPE_MISSING] = "missing",
+ [TYPE_PFN] = "pfn",
+ [TYPE_UNKNOWN] = "abnormal"};
unsigned long pfn, prev_pfn_type = 0, prev_pfn_level = 0;
unsigned int uninitialized_var(prev_level);
unsigned int uninitialized_var(prev_type);
@@ -856,4 +918,32 @@ int p2m_dump_show(struct seq_file *m, void *v)
#undef TYPE_PFN
#undef TYPE_UNKNOWN
}
-#endif
+
+static int p2m_dump_open(struct inode *inode, struct file *filp)
+{
+ return single_open(filp, p2m_dump_show, NULL);
+}
+
+static const struct file_operations p2m_dump_fops = {
+ .open = p2m_dump_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static struct dentry *d_mmu_debug;
+
+static int __init xen_p2m_debugfs(void)
+{
+ struct dentry *d_xen = xen_init_debugfs();
+
+ if (d_xen == NULL)
+ return -ENOMEM;
+
+ d_mmu_debug = debugfs_create_dir("mmu", d_xen);
+
+ debugfs_create_file("p2m", 0600, d_mmu_debug, NULL, &p2m_dump_fops);
+ return 0;
+}
+fs_initcall(xen_p2m_debugfs);
+#endif /* CONFIG_XEN_DEBUG_FS */
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index c3b8d440873c..38d0af4fefec 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -37,7 +37,10 @@ extern void xen_syscall_target(void);
extern void xen_syscall32_target(void);
/* Amount of extra memory space we add to the e820 ranges */
-phys_addr_t xen_extra_mem_start, xen_extra_mem_size;
+struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS] __initdata;
+
+/* Number of pages released from the initial allocation. */
+unsigned long xen_released_pages;
/*
* The maximum amount of extra memory compared to the base size. The
@@ -51,48 +54,47 @@ phys_addr_t xen_extra_mem_start, xen_extra_mem_size;
*/
#define EXTRA_MEM_RATIO (10)
-static void __init xen_add_extra_mem(unsigned long pages)
+static void __init xen_add_extra_mem(u64 start, u64 size)
{
unsigned long pfn;
+ int i;
- u64 size = (u64)pages * PAGE_SIZE;
- u64 extra_start = xen_extra_mem_start + xen_extra_mem_size;
-
- if (!pages)
- return;
-
- e820_add_region(extra_start, size, E820_RAM);
- sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
-
- memblock_x86_reserve_range(extra_start, extra_start + size, "XEN EXTRA");
+ for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) {
+ /* Add new region. */
+ if (xen_extra_mem[i].size == 0) {
+ xen_extra_mem[i].start = start;
+ xen_extra_mem[i].size = size;
+ break;
+ }
+ /* Append to existing region. */
+ if (xen_extra_mem[i].start + xen_extra_mem[i].size == start) {
+ xen_extra_mem[i].size += size;
+ break;
+ }
+ }
+ if (i == XEN_EXTRA_MEM_MAX_REGIONS)
+ printk(KERN_WARNING "Warning: not enough extra memory regions\n");
- xen_extra_mem_size += size;
+ memblock_x86_reserve_range(start, start + size, "XEN EXTRA");
- xen_max_p2m_pfn = PFN_DOWN(extra_start + size);
+ xen_max_p2m_pfn = PFN_DOWN(start + size);
- for (pfn = PFN_DOWN(extra_start); pfn <= xen_max_p2m_pfn; pfn++)
+ for (pfn = PFN_DOWN(start); pfn <= xen_max_p2m_pfn; pfn++)
__set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
}
-static unsigned long __init xen_release_chunk(phys_addr_t start_addr,
- phys_addr_t end_addr)
+static unsigned long __init xen_release_chunk(unsigned long start,
+ unsigned long end)
{
struct xen_memory_reservation reservation = {
.address_bits = 0,
.extent_order = 0,
.domid = DOMID_SELF
};
- unsigned long start, end;
unsigned long len = 0;
unsigned long pfn;
int ret;
- start = PFN_UP(start_addr);
- end = PFN_DOWN(end_addr);
-
- if (end <= start)
- return 0;
-
for(pfn = start; pfn < end; pfn++) {
unsigned long mfn = pfn_to_mfn(pfn);
@@ -117,72 +119,52 @@ static unsigned long __init xen_release_chunk(phys_addr_t start_addr,
return len;
}
-static unsigned long __init xen_return_unused_memory(unsigned long max_pfn,
- const struct e820map *e820)
+static unsigned long __init xen_set_identity_and_release(
+ const struct e820entry *list, size_t map_size, unsigned long nr_pages)
{
- phys_addr_t max_addr = PFN_PHYS(max_pfn);
- phys_addr_t last_end = ISA_END_ADDRESS;
+ phys_addr_t start = 0;
unsigned long released = 0;
- int i;
-
- /* Free any unused memory above the low 1Mbyte. */
- for (i = 0; i < e820->nr_map && last_end < max_addr; i++) {
- phys_addr_t end = e820->map[i].addr;
- end = min(max_addr, end);
-
- if (last_end < end)
- released += xen_release_chunk(last_end, end);
- last_end = max(last_end, e820->map[i].addr + e820->map[i].size);
- }
-
- if (last_end < max_addr)
- released += xen_release_chunk(last_end, max_addr);
-
- printk(KERN_INFO "released %lu pages of unused memory\n", released);
- return released;
-}
-
-static unsigned long __init xen_set_identity(const struct e820entry *list,
- ssize_t map_size)
-{
- phys_addr_t last = xen_initial_domain() ? 0 : ISA_END_ADDRESS;
- phys_addr_t start_pci = last;
- const struct e820entry *entry;
unsigned long identity = 0;
+ const struct e820entry *entry;
int i;
+ /*
+ * Combine non-RAM regions and gaps until a RAM region (or the
+ * end of the map) is reached, then set the 1:1 map and
+ * release the pages (if available) in those non-RAM regions.
+ *
+ * The combined non-RAM regions are rounded to a whole number
+ * of pages so any partial pages are accessible via the 1:1
+ * mapping. This is needed for some BIOSes that put (for
+ * example) the DMI tables in a reserved region that begins on
+ * a non-page boundary.
+ */
for (i = 0, entry = list; i < map_size; i++, entry++) {
- phys_addr_t start = entry->addr;
- phys_addr_t end = start + entry->size;
+ phys_addr_t end = entry->addr + entry->size;
- if (start < last)
- start = last;
+ if (entry->type == E820_RAM || i == map_size - 1) {
+ unsigned long start_pfn = PFN_DOWN(start);
+ unsigned long end_pfn = PFN_UP(end);
- if (end <= start)
- continue;
+ if (entry->type == E820_RAM)
+ end_pfn = PFN_UP(entry->addr);
- /* Skip over the 1MB region. */
- if (last > end)
- continue;
+ if (start_pfn < end_pfn) {
+ if (start_pfn < nr_pages)
+ released += xen_release_chunk(
+ start_pfn, min(end_pfn, nr_pages));
- if ((entry->type == E820_RAM) || (entry->type == E820_UNUSABLE)) {
- if (start > start_pci)
identity += set_phys_range_identity(
- PFN_UP(start_pci), PFN_DOWN(start));
-
- /* Without saving 'last' we would gooble RAM too
- * at the end of the loop. */
- last = end;
- start_pci = end;
- continue;
+ start_pfn, end_pfn);
+ }
+ start = end;
}
- start_pci = min(start, start_pci);
- last = end;
}
- if (last > start_pci)
- identity += set_phys_range_identity(
- PFN_UP(start_pci), PFN_DOWN(last));
- return identity;
+
+ printk(KERN_INFO "Released %lu pages of unused memory\n", released);
+ printk(KERN_INFO "Set %ld page(s) to 1-1 mapping\n", identity);
+
+ return released;
}
static unsigned long __init xen_get_max_pages(void)
@@ -197,21 +179,32 @@ static unsigned long __init xen_get_max_pages(void)
return min(max_pages, MAX_DOMAIN_PAGES);
}
+static void xen_align_and_add_e820_region(u64 start, u64 size, int type)
+{
+ u64 end = start + size;
+
+ /* Align RAM regions to page boundaries. */
+ if (type == E820_RAM) {
+ start = PAGE_ALIGN(start);
+ end &= ~((u64)PAGE_SIZE - 1);
+ }
+
+ e820_add_region(start, end - start, type);
+}
+
/**
* machine_specific_memory_setup - Hook for machine specific memory setup.
**/
char * __init xen_memory_setup(void)
{
static struct e820entry map[E820MAX] __initdata;
- static struct e820entry map_raw[E820MAX] __initdata;
unsigned long max_pfn = xen_start_info->nr_pages;
unsigned long long mem_end;
int rc;
struct xen_memory_map memmap;
+ unsigned long max_pages;
unsigned long extra_pages = 0;
- unsigned long extra_limit;
- unsigned long identity_pages = 0;
int i;
int op;
@@ -237,58 +230,65 @@ char * __init xen_memory_setup(void)
}
BUG_ON(rc);
- memcpy(map_raw, map, sizeof(map));
- e820.nr_map = 0;
- xen_extra_mem_start = mem_end;
- for (i = 0; i < memmap.nr_entries; i++) {
- unsigned long long end;
-
- /* Guard against non-page aligned E820 entries. */
- if (map[i].type == E820_RAM)
- map[i].size -= (map[i].size + map[i].addr) % PAGE_SIZE;
-
- end = map[i].addr + map[i].size;
- if (map[i].type == E820_RAM && end > mem_end) {
- /* RAM off the end - may be partially included */
- u64 delta = min(map[i].size, end - mem_end);
-
- map[i].size -= delta;
- end -= delta;
-
- extra_pages += PFN_DOWN(delta);
- /*
- * Set RAM below 4GB that is not for us to be unusable.
- * This prevents "System RAM" address space from being
- * used as potential resource for I/O address (happens
- * when 'allocate_resource' is called).
- */
- if (delta &&
- (xen_initial_domain() && end < 0x100000000ULL))
- e820_add_region(end, delta, E820_UNUSABLE);
+ /* Make sure the Xen-supplied memory map is well-ordered. */
+ sanitize_e820_map(map, memmap.nr_entries, &memmap.nr_entries);
+
+ max_pages = xen_get_max_pages();
+ if (max_pages > max_pfn)
+ extra_pages += max_pages - max_pfn;
+
+ /*
+ * Set P2M for all non-RAM pages and E820 gaps to be identity
+ * type PFNs. Any RAM pages that would be made inaccesible by
+ * this are first released.
+ */
+ xen_released_pages = xen_set_identity_and_release(
+ map, memmap.nr_entries, max_pfn);
+ extra_pages += xen_released_pages;
+
+ /*
+ * Clamp the amount of extra memory to a EXTRA_MEM_RATIO
+ * factor the base size. On non-highmem systems, the base
+ * size is the full initial memory allocation; on highmem it
+ * is limited to the max size of lowmem, so that it doesn't
+ * get completely filled.
+ *
+ * In principle there could be a problem in lowmem systems if
+ * the initial memory is also very large with respect to
+ * lowmem, but we won't try to deal with that here.
+ */
+ extra_pages = min(EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)),
+ extra_pages);
+
+ i = 0;
+ while (i < memmap.nr_entries) {
+ u64 addr = map[i].addr;
+ u64 size = map[i].size;
+ u32 type = map[i].type;
+
+ if (type == E820_RAM) {
+ if (addr < mem_end) {
+ size = min(size, mem_end - addr);
+ } else if (extra_pages) {
+ size = min(size, (u64)extra_pages * PAGE_SIZE);
+ extra_pages -= size / PAGE_SIZE;
+ xen_add_extra_mem(addr, size);
+ } else
+ type = E820_UNUSABLE;
}
- if (map[i].size > 0 && end > xen_extra_mem_start)
- xen_extra_mem_start = end;
+ xen_align_and_add_e820_region(addr, size, type);
- /* Add region if any remains */
- if (map[i].size > 0)
- e820_add_region(map[i].addr, map[i].size, map[i].type);
+ map[i].addr += size;
+ map[i].size -= size;
+ if (map[i].size == 0)
+ i++;
}
- /* Align the balloon area so that max_low_pfn does not get set
- * to be at the _end_ of the PCI gap at the far end (fee01000).
- * Note that xen_extra_mem_start gets set in the loop above to be
- * past the last E820 region. */
- if (xen_initial_domain() && (xen_extra_mem_start < (1ULL<<32)))
- xen_extra_mem_start = (1ULL<<32);
/*
* In domU, the ISA region is normal, usable memory, but we
* reserve ISA memory anyway because too many things poke
* about in there.
- *
- * In Dom0, the host E820 information can leave gaps in the
- * ISA range, which would cause us to release those pages. To
- * avoid this, we unconditionally reserve them here.
*/
e820_add_region(ISA_START_ADDRESS, ISA_END_ADDRESS - ISA_START_ADDRESS,
E820_RESERVED);
@@ -305,42 +305,6 @@ char * __init xen_memory_setup(void)
sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
- extra_limit = xen_get_max_pages();
- if (extra_limit >= max_pfn)
- extra_pages = extra_limit - max_pfn;
- else
- extra_pages = 0;
-
- extra_pages += xen_return_unused_memory(xen_start_info->nr_pages, &e820);
-
- /*
- * Clamp the amount of extra memory to a EXTRA_MEM_RATIO
- * factor the base size. On non-highmem systems, the base
- * size is the full initial memory allocation; on highmem it
- * is limited to the max size of lowmem, so that it doesn't
- * get completely filled.
- *
- * In principle there could be a problem in lowmem systems if
- * the initial memory is also very large with respect to
- * lowmem, but we won't try to deal with that here.
- */
- extra_limit = min(EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)),
- max_pfn + extra_pages);
-
- if (extra_limit >= max_pfn)
- extra_pages = extra_limit - max_pfn;
- else
- extra_pages = 0;
-
- xen_add_extra_mem(extra_pages);
-
- /*
- * Set P2M for all non-RAM pages and E820 gaps to be identity
- * type PFNs. We supply it with the non-sanitized version
- * of the E820.
- */
- identity_pages = xen_set_identity(map_raw, memmap.nr_entries);
- printk(KERN_INFO "Set %ld page(s) to 1-1 mapping.\n", identity_pages);
return "Xen";
}
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index d4fc6d454f8d..041d4fe9dfe4 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -532,7 +532,6 @@ static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus)
WARN_ON(xen_smp_intr_init(0));
xen_init_lock_cpu(0);
- xen_init_spinlocks();
}
static int __cpuinit xen_hvm_cpu_up(unsigned int cpu)
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index 5158c505bef9..163b4679556e 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -168,9 +168,10 @@ cycle_t xen_clocksource_read(void)
struct pvclock_vcpu_time_info *src;
cycle_t ret;
- src = &get_cpu_var(xen_vcpu)->time;
+ preempt_disable_notrace();
+ src = &__get_cpu_var(xen_vcpu)->time;
ret = pvclock_clocksource_read(src);
- put_cpu_var(xen_vcpu);
+ preempt_enable_notrace();
return ret;
}