summaryrefslogtreecommitdiff
path: root/arch/i386
diff options
context:
space:
mode:
Diffstat (limited to 'arch/i386')
-rw-r--r--arch/i386/Kconfig4
-rw-r--r--arch/i386/kernel/apm.c26
-rw-r--r--arch/i386/kernel/cpu/mtrr/generic.c4
-rw-r--r--arch/i386/kernel/efi_stub.S1
-rw-r--r--arch/i386/kernel/reboot.c12
-rw-r--r--arch/i386/kernel/setup.c23
-rw-r--r--arch/i386/kernel/smp.c66
-rw-r--r--arch/i386/kernel/smpboot.c6
-rw-r--r--arch/i386/kernel/srat.c5
-rw-r--r--arch/i386/kernel/time.c50
-rw-r--r--arch/i386/kernel/time_hpet.c37
-rw-r--r--arch/i386/kernel/traps.c11
-rw-r--r--arch/i386/kernel/vmlinux.lds.S12
-rw-r--r--arch/i386/mach-voyager/voyager_thread.c1
-rw-r--r--arch/i386/mm/boot_ioremap.c7
-rw-r--r--arch/i386/mm/discontig.c33
-rw-r--r--arch/i386/mm/init.c44
-rw-r--r--arch/i386/mm/pgtable.c30
-rw-r--r--arch/i386/power/swsusp.S2
19 files changed, 298 insertions, 76 deletions
diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig
index b2751eadbc56..6189b0c28d6f 100644
--- a/arch/i386/Kconfig
+++ b/arch/i386/Kconfig
@@ -494,7 +494,7 @@ config HIGHMEM64G
endchoice
choice
- depends on EXPERIMENTAL && !X86_PAE
+ depends on EXPERIMENTAL
prompt "Memory split" if EMBEDDED
default VMSPLIT_3G
help
@@ -516,6 +516,7 @@ choice
config VMSPLIT_3G
bool "3G/1G user/kernel split"
config VMSPLIT_3G_OPT
+ depends on !HIGHMEM
bool "3G/1G user/kernel split (for full 1G low memory)"
config VMSPLIT_2G
bool "2G/2G user/kernel split"
@@ -794,6 +795,7 @@ config HOTPLUG_CPU
config COMPAT_VDSO
bool "Compat VDSO support"
default y
+ depends on !PARAVIRT
help
Map the VDSO to the predictable old-style address too.
---help---
diff --git a/arch/i386/kernel/apm.c b/arch/i386/kernel/apm.c
index 8591f2fa920c..ff9ce4b5eaa8 100644
--- a/arch/i386/kernel/apm.c
+++ b/arch/i386/kernel/apm.c
@@ -1154,9 +1154,11 @@ out:
static void set_time(void)
{
+ struct timespec ts;
if (got_clock_diff) { /* Must know time zone in order to set clock */
- xtime.tv_sec = get_cmos_time() + clock_cmos_diff;
- xtime.tv_nsec = 0;
+ ts.tv_sec = get_cmos_time() + clock_cmos_diff;
+ ts.tv_nsec = 0;
+ do_settimeofday(&ts);
}
}
@@ -1232,13 +1234,8 @@ static int suspend(int vetoable)
restore_processor_state();
local_irq_disable();
- write_seqlock(&xtime_lock);
- spin_lock(&i8253_lock);
- reinit_timer();
set_time();
-
- spin_unlock(&i8253_lock);
- write_sequnlock(&xtime_lock);
+ reinit_timer();
if (err == APM_NO_ERROR)
err = APM_SUCCESS;
@@ -1365,9 +1362,7 @@ static void check_events(void)
ignore_bounce = 1;
if ((event != APM_NORMAL_RESUME)
|| (ignore_normal_resume == 0)) {
- write_seqlock_irq(&xtime_lock);
set_time();
- write_sequnlock_irq(&xtime_lock);
device_resume();
pm_send_all(PM_RESUME, (void *)0);
queue_event(event, NULL);
@@ -1383,9 +1378,7 @@ static void check_events(void)
break;
case APM_UPDATE_TIME:
- write_seqlock_irq(&xtime_lock);
set_time();
- write_sequnlock_irq(&xtime_lock);
break;
case APM_CRITICAL_SUSPEND:
@@ -2339,6 +2332,7 @@ static int __init apm_init(void)
ret = kernel_thread(apm, NULL, CLONE_KERNEL | SIGCHLD);
if (ret < 0) {
printk(KERN_ERR "apm: disabled - Unable to start kernel thread.\n");
+ remove_proc_entry("apm", NULL);
return -ENOMEM;
}
@@ -2348,7 +2342,13 @@ static int __init apm_init(void)
return 0;
}
- misc_register(&apm_device);
+ /*
+ * Note we don't actually care if the misc_device cannot be registered.
+ * this driver can do its job without it, even if userspace can't
+ * control it. just log the error
+ */
+ if (misc_register(&apm_device))
+ printk(KERN_WARNING "apm: Could not register misc device.\n");
if (HZ != 100)
idle_period = (idle_period * HZ) / 100;
diff --git a/arch/i386/kernel/cpu/mtrr/generic.c b/arch/i386/kernel/cpu/mtrr/generic.c
index 169ac8e0db68..0b61eed8bbd8 100644
--- a/arch/i386/kernel/cpu/mtrr/generic.c
+++ b/arch/i386/kernel/cpu/mtrr/generic.c
@@ -243,7 +243,7 @@ static DEFINE_SPINLOCK(set_atomicity_lock);
* has been called.
*/
-static void prepare_set(void)
+static void prepare_set(void) __acquires(set_atomicity_lock)
{
unsigned long cr0;
@@ -274,7 +274,7 @@ static void prepare_set(void)
mtrr_wrmsr(MTRRdefType_MSR, deftype_lo & 0xf300UL, deftype_hi);
}
-static void post_set(void)
+static void post_set(void) __releases(set_atomicity_lock)
{
/* Flush TLBs (no need to flush caches - they are disabled) */
__flush_tlb();
diff --git a/arch/i386/kernel/efi_stub.S b/arch/i386/kernel/efi_stub.S
index d3ee73a3eee3..ef00bb77d7e4 100644
--- a/arch/i386/kernel/efi_stub.S
+++ b/arch/i386/kernel/efi_stub.S
@@ -7,7 +7,6 @@
#include <linux/linkage.h>
#include <asm/page.h>
-#include <asm/pgtable.h>
/*
* efi_call_phys(void *, ...) is a function with variable parameters.
diff --git a/arch/i386/kernel/reboot.c b/arch/i386/kernel/reboot.c
index 54cfeabbc5e4..84278e0093a2 100644
--- a/arch/i386/kernel/reboot.c
+++ b/arch/i386/kernel/reboot.c
@@ -145,14 +145,10 @@ real_mode_gdt_entries [3] =
0x000092000100ffffULL /* 16-bit real-mode 64k data at 0x00000100 */
};
-static struct
-{
- unsigned short size __attribute__ ((packed));
- unsigned long long * base __attribute__ ((packed));
-}
-real_mode_gdt = { sizeof (real_mode_gdt_entries) - 1, real_mode_gdt_entries },
-real_mode_idt = { 0x3ff, NULL },
-no_idt = { 0, NULL };
+static struct Xgt_desc_struct
+real_mode_gdt = { sizeof (real_mode_gdt_entries) - 1, (long)real_mode_gdt_entries },
+real_mode_idt = { 0x3ff, 0 },
+no_idt = { 0, 0 };
/* This is 16-bit protected mode code to disable paging and the cache,
diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c
index f1682206d304..16d99444cf66 100644
--- a/arch/i386/kernel/setup.c
+++ b/arch/i386/kernel/setup.c
@@ -53,6 +53,7 @@
#include <asm/apic.h>
#include <asm/e820.h>
#include <asm/mpspec.h>
+#include <asm/mmzone.h>
#include <asm/setup.h>
#include <asm/arch_hooks.h>
#include <asm/sections.h>
@@ -934,6 +935,24 @@ static void __init parse_cmdline_early (char ** cmdline_p)
}
/*
+ * reservetop=size reserves a hole at the top of the kernel address space which
+ * a hypervisor can load into later. Needed for dynamically loaded hypervisors,
+ * so relocating the fixmap can be done before paging initialization.
+ */
+static int __init parse_reservetop(char *arg)
+{
+ unsigned long address;
+
+ if (!arg)
+ return -EINVAL;
+
+ address = memparse(arg, &arg);
+ reserve_top_address(address);
+ return 0;
+}
+early_param("reservetop", parse_reservetop);
+
+/*
* Callback for efi_memory_walk.
*/
static int __init
@@ -1181,7 +1200,7 @@ static unsigned long __init setup_memory(void)
void __init zone_sizes_init(void)
{
- unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
+ unsigned long zones_size[MAX_NR_ZONES] = { 0, };
unsigned int max_dma, low;
max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
@@ -1258,7 +1277,7 @@ void __init setup_bootmem_allocator(void)
*/
find_smp_config();
#endif
-
+ numa_kva_reserve();
#ifdef CONFIG_BLK_DEV_INITRD
if (LOADER_TYPE && INITRD_START) {
if (INITRD_START + INITRD_SIZE <= (max_low_pfn << PAGE_SHIFT)) {
diff --git a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c
index c10789d7a9d3..465188e2d701 100644
--- a/arch/i386/kernel/smp.c
+++ b/arch/i386/kernel/smp.c
@@ -634,3 +634,69 @@ fastcall void smp_call_function_interrupt(struct pt_regs *regs)
}
}
+/*
+ * this function sends a 'generic call function' IPI to one other CPU
+ * in the system.
+ *
+ * cpu is a standard Linux logical CPU number.
+ */
+static void
+__smp_call_function_single(int cpu, void (*func) (void *info), void *info,
+ int nonatomic, int wait)
+{
+ struct call_data_struct data;
+ int cpus = 1;
+
+ data.func = func;
+ data.info = info;
+ atomic_set(&data.started, 0);
+ data.wait = wait;
+ if (wait)
+ atomic_set(&data.finished, 0);
+
+ call_data = &data;
+ wmb();
+ /* Send a message to all other CPUs and wait for them to respond */
+ send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNCTION_VECTOR);
+
+ /* Wait for response */
+ while (atomic_read(&data.started) != cpus)
+ cpu_relax();
+
+ if (!wait)
+ return;
+
+ while (atomic_read(&data.finished) != cpus)
+ cpu_relax();
+}
+
+/*
+ * smp_call_function_single - Run a function on another CPU
+ * @func: The function to run. This must be fast and non-blocking.
+ * @info: An arbitrary pointer to pass to the function.
+ * @nonatomic: Currently unused.
+ * @wait: If true, wait until function has completed on other CPUs.
+ *
+ * Retrurns 0 on success, else a negative status code.
+ *
+ * Does not return until the remote CPU is nearly ready to execute <func>
+ * or is or has executed.
+ */
+
+int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
+ int nonatomic, int wait)
+{
+ /* prevent preemption and reschedule on another processor */
+ int me = get_cpu();
+ if (cpu == me) {
+ WARN_ON(1);
+ put_cpu();
+ return -EBUSY;
+ }
+ spin_lock_bh(&call_lock);
+ __smp_call_function_single(cpu, func, info, nonatomic, wait);
+ spin_unlock_bh(&call_lock);
+ put_cpu();
+ return 0;
+}
+EXPORT_SYMBOL(smp_call_function_single);
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c
index f948419c888a..efe07990e7fc 100644
--- a/arch/i386/kernel/smpboot.c
+++ b/arch/i386/kernel/smpboot.c
@@ -642,9 +642,13 @@ static void map_cpu_to_logical_apicid(void)
{
int cpu = smp_processor_id();
int apicid = logical_smp_processor_id();
+ int node = apicid_to_node(apicid);
+
+ if (!node_online(node))
+ node = first_online_node;
cpu_2_logical_apicid[cpu] = apicid;
- map_cpu_to_node(cpu, apicid_to_node(apicid));
+ map_cpu_to_node(cpu, node);
}
static void unmap_cpu_to_logical_apicid(int cpu)
diff --git a/arch/i386/kernel/srat.c b/arch/i386/kernel/srat.c
index b1809c9a0899..83db411b3aa7 100644
--- a/arch/i386/kernel/srat.c
+++ b/arch/i386/kernel/srat.c
@@ -42,7 +42,7 @@
#define PXM_BITMAP_LEN (MAX_PXM_DOMAINS / 8)
static u8 pxm_bitmap[PXM_BITMAP_LEN]; /* bitmap of proximity domains */
-#define MAX_CHUNKS_PER_NODE 4
+#define MAX_CHUNKS_PER_NODE 3
#define MAXCHUNKS (MAX_CHUNKS_PER_NODE * MAX_NUMNODES)
struct node_memory_chunk_s {
unsigned long start_pfn;
@@ -135,9 +135,6 @@ static void __init parse_memory_affinity_structure (char *sratp)
"enabled and removable" : "enabled" ) );
}
-#if MAX_NR_ZONES != 4
-#error "MAX_NR_ZONES != 4, chunk_to_zone requires review"
-#endif
/* Take a chunk of pages from page frame cstart to cend and count the number
* of pages in each zone, returned via zones[].
*/
diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c
index edd00f6cee37..1302e4ab3c4f 100644
--- a/arch/i386/kernel/time.c
+++ b/arch/i386/kernel/time.c
@@ -270,16 +270,19 @@ void notify_arch_cmos_timer(void)
mod_timer(&sync_cmos_timer, jiffies + 1);
}
-static long clock_cmos_diff, sleep_start;
+static long clock_cmos_diff;
+static unsigned long sleep_start;
static int timer_suspend(struct sys_device *dev, pm_message_t state)
{
/*
* Estimate time zone so that set_time can update the clock
*/
- clock_cmos_diff = -get_cmos_time();
+ unsigned long ctime = get_cmos_time();
+
+ clock_cmos_diff = -ctime;
clock_cmos_diff += get_seconds();
- sleep_start = get_cmos_time();
+ sleep_start = ctime;
return 0;
}
@@ -287,18 +290,29 @@ static int timer_resume(struct sys_device *dev)
{
unsigned long flags;
unsigned long sec;
- unsigned long sleep_length;
-
+ unsigned long ctime = get_cmos_time();
+ long sleep_length = (ctime - sleep_start) * HZ;
+ struct timespec ts;
+
+ if (sleep_length < 0) {
+ printk(KERN_WARNING "CMOS clock skew detected in timer resume!\n");
+ /* The time after the resume must not be earlier than the time
+ * before the suspend or some nasty things will happen
+ */
+ sleep_length = 0;
+ ctime = sleep_start;
+ }
#ifdef CONFIG_HPET_TIMER
if (is_hpet_enabled())
hpet_reenable();
#endif
setup_pit_timer();
- sec = get_cmos_time() + clock_cmos_diff;
- sleep_length = (get_cmos_time() - sleep_start) * HZ;
+
+ sec = ctime + clock_cmos_diff;
+ ts.tv_sec = sec;
+ ts.tv_nsec = 0;
+ do_settimeofday(&ts);
write_seqlock_irqsave(&xtime_lock, flags);
- xtime.tv_sec = sec;
- xtime.tv_nsec = 0;
jiffies_64 += sleep_length;
wall_jiffies += sleep_length;
write_sequnlock_irqrestore(&xtime_lock, flags);
@@ -334,10 +348,11 @@ extern void (*late_time_init)(void);
/* Duplicate of time_init() below, with hpet_enable part added */
static void __init hpet_time_init(void)
{
- xtime.tv_sec = get_cmos_time();
- xtime.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ);
- set_normalized_timespec(&wall_to_monotonic,
- -xtime.tv_sec, -xtime.tv_nsec);
+ struct timespec ts;
+ ts.tv_sec = get_cmos_time();
+ ts.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ);
+
+ do_settimeofday(&ts);
if ((hpet_enable() >= 0) && hpet_use_timer) {
printk("Using HPET for base-timer\n");
@@ -349,6 +364,7 @@ static void __init hpet_time_init(void)
void __init time_init(void)
{
+ struct timespec ts;
#ifdef CONFIG_HPET_TIMER
if (is_hpet_capable()) {
/*
@@ -359,10 +375,10 @@ void __init time_init(void)
return;
}
#endif
- xtime.tv_sec = get_cmos_time();
- xtime.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ);
- set_normalized_timespec(&wall_to_monotonic,
- -xtime.tv_sec, -xtime.tv_nsec);
+ ts.tv_sec = get_cmos_time();
+ ts.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ);
+
+ do_settimeofday(&ts);
time_init_hook();
}
diff --git a/arch/i386/kernel/time_hpet.c b/arch/i386/kernel/time_hpet.c
index 14a1376fedd1..6bf14a4e995e 100644
--- a/arch/i386/kernel/time_hpet.c
+++ b/arch/i386/kernel/time_hpet.c
@@ -301,23 +301,25 @@ int hpet_rtc_timer_init(void)
hpet_rtc_int_freq = DEFAULT_RTC_INT_FREQ;
local_irq_save(flags);
+
cnt = hpet_readl(HPET_COUNTER);
cnt += ((hpet_tick*HZ)/hpet_rtc_int_freq);
hpet_writel(cnt, HPET_T1_CMP);
hpet_t1_cmp = cnt;
- local_irq_restore(flags);
cfg = hpet_readl(HPET_T1_CFG);
cfg &= ~HPET_TN_PERIODIC;
cfg |= HPET_TN_ENABLE | HPET_TN_32BIT;
hpet_writel(cfg, HPET_T1_CFG);
+ local_irq_restore(flags);
+
return 1;
}
static void hpet_rtc_timer_reinit(void)
{
- unsigned int cfg, cnt;
+ unsigned int cfg, cnt, ticks_per_int, lost_ints;
if (unlikely(!(PIE_on | AIE_on | UIE_on))) {
cfg = hpet_readl(HPET_T1_CFG);
@@ -332,10 +334,33 @@ static void hpet_rtc_timer_reinit(void)
hpet_rtc_int_freq = DEFAULT_RTC_INT_FREQ;
/* It is more accurate to use the comparator value than current count.*/
- cnt = hpet_t1_cmp;
- cnt += hpet_tick*HZ/hpet_rtc_int_freq;
- hpet_writel(cnt, HPET_T1_CMP);
- hpet_t1_cmp = cnt;
+ ticks_per_int = hpet_tick * HZ / hpet_rtc_int_freq;
+ hpet_t1_cmp += ticks_per_int;
+ hpet_writel(hpet_t1_cmp, HPET_T1_CMP);
+
+ /*
+ * If the interrupt handler was delayed too long, the write above tries
+ * to schedule the next interrupt in the past and the hardware would
+ * not interrupt until the counter had wrapped around.
+ * So we have to check that the comparator wasn't set to a past time.
+ */
+ cnt = hpet_readl(HPET_COUNTER);
+ if (unlikely((int)(cnt - hpet_t1_cmp) > 0)) {
+ lost_ints = (cnt - hpet_t1_cmp) / ticks_per_int + 1;
+ /* Make sure that, even with the time needed to execute
+ * this code, the next scheduled interrupt has been moved
+ * back to the future: */
+ lost_ints++;
+
+ hpet_t1_cmp += lost_ints * ticks_per_int;
+ hpet_writel(hpet_t1_cmp, HPET_T1_CMP);
+
+ if (PIE_on)
+ PIE_count += lost_ints;
+
+ printk(KERN_WARNING "rtc: lost some interrupts at %ldHz.\n",
+ hpet_rtc_int_freq);
+ }
}
/*
diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c
index 7e9edafffd8a..4fcc6690be99 100644
--- a/arch/i386/kernel/traps.c
+++ b/arch/i386/kernel/traps.c
@@ -313,6 +313,8 @@ void show_registers(struct pt_regs *regs)
*/
if (in_kernel) {
u8 __user *eip;
+ int code_bytes = 64;
+ unsigned char c;
printk("\n" KERN_EMERG "Stack: ");
show_stack_log_lvl(NULL, regs, (unsigned long *)esp, KERN_EMERG);
@@ -320,9 +322,12 @@ void show_registers(struct pt_regs *regs)
printk(KERN_EMERG "Code: ");
eip = (u8 __user *)regs->eip - 43;
- for (i = 0; i < 64; i++, eip++) {
- unsigned char c;
-
+ if (eip < (u8 __user *)PAGE_OFFSET || __get_user(c, eip)) {
+ /* try starting at EIP */
+ eip = (u8 __user *)regs->eip;
+ code_bytes = 32;
+ }
+ for (i = 0; i < code_bytes; i++, eip++) {
if (eip < (u8 __user *)PAGE_OFFSET || __get_user(c, eip)) {
printk(" Bad EIP value.");
break;
diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S
index 2d4f1386e2b1..1e7ac1c44ddc 100644
--- a/arch/i386/kernel/vmlinux.lds.S
+++ b/arch/i386/kernel/vmlinux.lds.S
@@ -13,6 +13,12 @@ OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386")
OUTPUT_ARCH(i386)
ENTRY(phys_startup_32)
jiffies = jiffies_64;
+
+PHDRS {
+ text PT_LOAD FLAGS(5); /* R_E */
+ data PT_LOAD FLAGS(7); /* RWE */
+ note PT_NOTE FLAGS(4); /* R__ */
+}
SECTIONS
{
. = __KERNEL_START;
@@ -26,7 +32,7 @@ SECTIONS
KPROBES_TEXT
*(.fixup)
*(.gnu.warning)
- } = 0x9090
+ } :text = 0x9090
_etext = .; /* End of text section */
@@ -48,7 +54,7 @@ SECTIONS
.data : AT(ADDR(.data) - LOAD_OFFSET) { /* Data */
*(.data)
CONSTRUCTORS
- }
+ } :data
. = ALIGN(4096);
__nosave_begin = .;
@@ -184,4 +190,6 @@ SECTIONS
STABS_DEBUG
DWARF_DEBUG
+
+ NOTES
}
diff --git a/arch/i386/mach-voyager/voyager_thread.c b/arch/i386/mach-voyager/voyager_thread.c
index 50f6de6ff64d..f39887359e8e 100644
--- a/arch/i386/mach-voyager/voyager_thread.c
+++ b/arch/i386/mach-voyager/voyager_thread.c
@@ -130,7 +130,6 @@ thread(void *unused)
init_timer(&wakeup_timer);
sigfillset(&current->blocked);
- current->signal->tty = NULL;
printk(KERN_NOTICE "Voyager starting monitor thread\n");
diff --git a/arch/i386/mm/boot_ioremap.c b/arch/i386/mm/boot_ioremap.c
index 5d44f4f5ff59..4de11f508c3a 100644
--- a/arch/i386/mm/boot_ioremap.c
+++ b/arch/i386/mm/boot_ioremap.c
@@ -29,8 +29,11 @@
*/
#define BOOT_PTE_PTRS (PTRS_PER_PTE*2)
-#define boot_pte_index(address) \
- (((address) >> PAGE_SHIFT) & (BOOT_PTE_PTRS - 1))
+
+static unsigned long boot_pte_index(unsigned long vaddr)
+{
+ return __pa(vaddr) >> PAGE_SHIFT;
+}
static inline boot_pte_t* boot_vaddr_to_pte(void *address)
{
diff --git a/arch/i386/mm/discontig.c b/arch/i386/mm/discontig.c
index 7c392dc553b8..fb5d8b747de4 100644
--- a/arch/i386/mm/discontig.c
+++ b/arch/i386/mm/discontig.c
@@ -117,7 +117,8 @@ void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags);
void *node_remap_end_vaddr[MAX_NUMNODES];
void *node_remap_alloc_vaddr[MAX_NUMNODES];
-
+static unsigned long kva_start_pfn;
+static unsigned long kva_pages;
/*
* FLAT - support for basic PC memory model with discontig enabled, essentially
* a single node with all available processors in it with a flat
@@ -286,7 +287,6 @@ unsigned long __init setup_memory(void)
{
int nid;
unsigned long system_start_pfn, system_max_low_pfn;
- unsigned long reserve_pages;
/*
* When mapping a NUMA machine we allocate the node_mem_map arrays
@@ -298,14 +298,23 @@ unsigned long __init setup_memory(void)
find_max_pfn();
get_memcfg_numa();
- reserve_pages = calculate_numa_remap_pages();
+ kva_pages = calculate_numa_remap_pages();
/* partially used pages are not usable - thus round upwards */
system_start_pfn = min_low_pfn = PFN_UP(init_pg_tables_end);
- system_max_low_pfn = max_low_pfn = find_max_low_pfn() - reserve_pages;
- printk("reserve_pages = %ld find_max_low_pfn() ~ %ld\n",
- reserve_pages, max_low_pfn + reserve_pages);
+ kva_start_pfn = find_max_low_pfn() - kva_pages;
+
+#ifdef CONFIG_BLK_DEV_INITRD
+ /* Numa kva area is below the initrd */
+ if (LOADER_TYPE && INITRD_START)
+ kva_start_pfn = PFN_DOWN(INITRD_START) - kva_pages;
+#endif
+ kva_start_pfn -= kva_start_pfn & (PTRS_PER_PTE-1);
+
+ system_max_low_pfn = max_low_pfn = find_max_low_pfn();
+ printk("kva_start_pfn ~ %ld find_max_low_pfn() ~ %ld\n",
+ kva_start_pfn, max_low_pfn);
printk("max_pfn = %ld\n", max_pfn);
#ifdef CONFIG_HIGHMEM
highstart_pfn = highend_pfn = max_pfn;
@@ -323,7 +332,7 @@ unsigned long __init setup_memory(void)
(ulong) pfn_to_kaddr(max_low_pfn));
for_each_online_node(nid) {
node_remap_start_vaddr[nid] = pfn_to_kaddr(
- highstart_pfn + node_remap_offset[nid]);
+ kva_start_pfn + node_remap_offset[nid]);
/* Init the node remap allocator */
node_remap_end_vaddr[nid] = node_remap_start_vaddr[nid] +
(node_remap_size[nid] * PAGE_SIZE);
@@ -338,7 +347,6 @@ unsigned long __init setup_memory(void)
}
printk("High memory starts at vaddr %08lx\n",
(ulong) pfn_to_kaddr(highstart_pfn));
- vmalloc_earlyreserve = reserve_pages * PAGE_SIZE;
for_each_online_node(nid)
find_max_pfn_node(nid);
@@ -348,13 +356,18 @@ unsigned long __init setup_memory(void)
return max_low_pfn;
}
+void __init numa_kva_reserve(void)
+{
+ reserve_bootmem(PFN_PHYS(kva_start_pfn),PFN_PHYS(kva_pages));
+}
+
void __init zone_sizes_init(void)
{
int nid;
for_each_online_node(nid) {
- unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
+ unsigned long zones_size[MAX_NR_ZONES] = {0, };
unsigned long *zholes_size;
unsigned int max_dma;
@@ -409,7 +422,7 @@ void __init set_highmem_pages_init(int bad_ppro)
zone_end_pfn = zone_start_pfn + zone->spanned_pages;
printk("Initializing %s for node %d (%08lx:%08lx)\n",
- zone->name, zone->zone_pgdat->node_id,
+ zone->name, zone_to_nid(zone),
zone_start_pfn, zone_end_pfn);
for (node_pfn = zone_start_pfn; node_pfn < zone_end_pfn; node_pfn++) {
diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c
index 89e8486aac34..efd0bcdac65d 100644
--- a/arch/i386/mm/init.c
+++ b/arch/i386/mm/init.c
@@ -629,6 +629,48 @@ void __init mem_init(void)
(unsigned long) (totalhigh_pages << (PAGE_SHIFT-10))
);
+#if 1 /* double-sanity-check paranoia */
+ printk("virtual kernel memory layout:\n"
+ " fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n"
+#ifdef CONFIG_HIGHMEM
+ " pkmap : 0x%08lx - 0x%08lx (%4ld kB)\n"
+#endif
+ " vmalloc : 0x%08lx - 0x%08lx (%4ld MB)\n"
+ " lowmem : 0x%08lx - 0x%08lx (%4ld MB)\n"
+ " .init : 0x%08lx - 0x%08lx (%4ld kB)\n"
+ " .data : 0x%08lx - 0x%08lx (%4ld kB)\n"
+ " .text : 0x%08lx - 0x%08lx (%4ld kB)\n",
+ FIXADDR_START, FIXADDR_TOP,
+ (FIXADDR_TOP - FIXADDR_START) >> 10,
+
+#ifdef CONFIG_HIGHMEM
+ PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE,
+ (LAST_PKMAP*PAGE_SIZE) >> 10,
+#endif
+
+ VMALLOC_START, VMALLOC_END,
+ (VMALLOC_END - VMALLOC_START) >> 20,
+
+ (unsigned long)__va(0), (unsigned long)high_memory,
+ ((unsigned long)high_memory - (unsigned long)__va(0)) >> 20,
+
+ (unsigned long)&__init_begin, (unsigned long)&__init_end,
+ ((unsigned long)&__init_end - (unsigned long)&__init_begin) >> 10,
+
+ (unsigned long)&_etext, (unsigned long)&_edata,
+ ((unsigned long)&_edata - (unsigned long)&_etext) >> 10,
+
+ (unsigned long)&_text, (unsigned long)&_etext,
+ ((unsigned long)&_etext - (unsigned long)&_text) >> 10);
+
+#ifdef CONFIG_HIGHMEM
+ BUG_ON(PKMAP_BASE+LAST_PKMAP*PAGE_SIZE > FIXADDR_START);
+ BUG_ON(VMALLOC_END > PKMAP_BASE);
+#endif
+ BUG_ON(VMALLOC_START > VMALLOC_END);
+ BUG_ON((unsigned long)high_memory > VMALLOC_START);
+#endif /* double-sanity-check paranoia */
+
#ifdef CONFIG_X86_PAE
if (!cpu_has_pae)
panic("cannot execute a PAE-enabled kernel on a PAE-less CPU!");
@@ -657,7 +699,7 @@ void __init mem_init(void)
int arch_add_memory(int nid, u64 start, u64 size)
{
struct pglist_data *pgdata = &contig_page_data;
- struct zone *zone = pgdata->node_zones + MAX_NR_ZONES-1;
+ struct zone *zone = pgdata->node_zones + ZONE_HIGHMEM;
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
diff --git a/arch/i386/mm/pgtable.c b/arch/i386/mm/pgtable.c
index bd98768d8764..10126e3f8174 100644
--- a/arch/i386/mm/pgtable.c
+++ b/arch/i386/mm/pgtable.c
@@ -12,6 +12,7 @@
#include <linux/slab.h>
#include <linux/pagemap.h>
#include <linux/spinlock.h>
+#include <linux/module.h>
#include <asm/system.h>
#include <asm/pgtable.h>
@@ -60,7 +61,9 @@ void show_mem(void)
printk(KERN_INFO "%lu pages writeback\n",
global_page_state(NR_WRITEBACK));
printk(KERN_INFO "%lu pages mapped\n", global_page_state(NR_FILE_MAPPED));
- printk(KERN_INFO "%lu pages slab\n", global_page_state(NR_SLAB));
+ printk(KERN_INFO "%lu pages slab\n",
+ global_page_state(NR_SLAB_RECLAIMABLE) +
+ global_page_state(NR_SLAB_UNRECLAIMABLE));
printk(KERN_INFO "%lu pages pagetables\n",
global_page_state(NR_PAGETABLE));
}
@@ -137,6 +140,12 @@ void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags)
__flush_tlb_one(vaddr);
}
+static int fixmaps;
+#ifndef CONFIG_COMPAT_VDSO
+unsigned long __FIXADDR_TOP = 0xfffff000;
+EXPORT_SYMBOL(__FIXADDR_TOP);
+#endif
+
void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t flags)
{
unsigned long address = __fix_to_virt(idx);
@@ -146,6 +155,25 @@ void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t flags)
return;
}
set_pte_pfn(address, phys >> PAGE_SHIFT, flags);
+ fixmaps++;
+}
+
+/**
+ * reserve_top_address - reserves a hole in the top of kernel address space
+ * @reserve - size of hole to reserve
+ *
+ * Can be used to relocate the fixmap area and poke a hole in the top
+ * of kernel address space to make room for a hypervisor.
+ */
+void reserve_top_address(unsigned long reserve)
+{
+ BUG_ON(fixmaps > 0);
+#ifdef CONFIG_COMPAT_VDSO
+ BUG_ON(reserve != 0);
+#else
+ __FIXADDR_TOP = -reserve - PAGE_SIZE;
+ __VMALLOC_RESERVE += reserve;
+#endif
}
pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
diff --git a/arch/i386/power/swsusp.S b/arch/i386/power/swsusp.S
index c893b897217f..8a2b50a0aaad 100644
--- a/arch/i386/power/swsusp.S
+++ b/arch/i386/power/swsusp.S
@@ -32,7 +32,7 @@ ENTRY(swsusp_arch_resume)
movl $swsusp_pg_dir-__PAGE_OFFSET, %ecx
movl %ecx, %cr3
- movl pagedir_nosave, %edx
+ movl restore_pblist, %edx
.p2align 4,,7
copy_loop: