summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndi Kleen <ak@suse.de>2008-01-30 13:33:17 +0100
committerIngo Molnar <mingo@elte.hu>2008-01-30 13:33:17 +0100
commit751752789162fde69474edfa15935d0a77c0bc17 (patch)
tree43eef77784989bc25979da1cc128e31fc46b3cea
parentedcd81199dbad5db11ae91b507cec1d46dd94a49 (diff)
downloadlwn-751752789162fde69474edfa15935d0a77c0bc17.tar.gz
lwn-751752789162fde69474edfa15935d0a77c0bc17.zip
x86: replace hard coded reservations in 64-bit early boot code with dynamic table
On x86-64 there are several memory allocations before bootmem. To avoid them stomping on each other they used to be all hard coded in bad_area(). Replace this with an array that is filled as needed. This cleans up the code considerably and allows to expand its use. Cc: peterz@infradead.org Signed-off-by: Andi Kleen <ak@suse.de> Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-rw-r--r--arch/x86/kernel/e820_64.c95
-rw-r--r--arch/x86/kernel/head64.c48
-rw-r--r--arch/x86/kernel/setup_64.c67
-rw-r--r--arch/x86/mm/init_64.c5
-rw-r--r--arch/x86/mm/numa_64.c1
-rw-r--r--include/asm-x86/e820_64.h4
-rw-r--r--include/asm-x86/proto.h2
7 files changed, 110 insertions, 112 deletions
diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c
index 07cfaae7ab07..f8b7bebb4344 100644
--- a/arch/x86/kernel/e820_64.c
+++ b/arch/x86/kernel/e820_64.c
@@ -47,56 +47,65 @@ unsigned long end_pfn_map;
*/
static unsigned long __initdata end_user_pfn = MAXMEM>>PAGE_SHIFT;
-/* Check for some hardcoded bad areas that early boot is not allowed to touch */
-static inline int bad_addr(unsigned long *addrp, unsigned long size)
-{
- unsigned long addr = *addrp, last = addr + size;
+/*
+ * Early reserved memory areas.
+ */
+#define MAX_EARLY_RES 20
+
+struct early_res {
+ unsigned long start, end;
+};
+static struct early_res early_res[MAX_EARLY_RES] __initdata = {
+ { 0, PAGE_SIZE }, /* BIOS data page */
+#ifdef CONFIG_SMP
+ { SMP_TRAMPOLINE_BASE, SMP_TRAMPOLINE_BASE + 2*PAGE_SIZE },
+#endif
+ {}
+};
- /* various gunk below that needed for SMP startup */
- if (addr < 0x8000) {
- *addrp = PAGE_ALIGN(0x8000);
- return 1;
+void __init reserve_early(unsigned long start, unsigned long end)
+{
+ int i;
+ struct early_res *r;
+ for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
+ r = &early_res[i];
+ if (end > r->start && start < r->end)
+ panic("Duplicated early reservation %lx-%lx\n",
+ start, end);
}
+ if (i >= MAX_EARLY_RES)
+ panic("Too many early reservations");
+ r = &early_res[i];
+ r->start = start;
+ r->end = end;
+}
- /* direct mapping tables of the kernel */
- if (last >= table_start<<PAGE_SHIFT && addr < table_end<<PAGE_SHIFT) {
- *addrp = PAGE_ALIGN(table_end << PAGE_SHIFT);
- return 1;
+void __init early_res_to_bootmem(void)
+{
+ int i;
+ for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
+ struct early_res *r = &early_res[i];
+ reserve_bootmem_generic(r->start, r->end - r->start);
}
+}
- /* initrd */
-#ifdef CONFIG_BLK_DEV_INITRD
- if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) {
- unsigned long ramdisk_image = boot_params.hdr.ramdisk_image;
- unsigned long ramdisk_size = boot_params.hdr.ramdisk_size;
- unsigned long ramdisk_end = ramdisk_image+ramdisk_size;
-
- if (last >= ramdisk_image && addr < ramdisk_end) {
- *addrp = PAGE_ALIGN(ramdisk_end);
- return 1;
+/* Check for already reserved areas */
+static inline int bad_addr(unsigned long *addrp, unsigned long size)
+{
+ int i;
+ unsigned long addr = *addrp, last;
+ int changed = 0;
+again:
+ last = addr + size;
+ for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
+ struct early_res *r = &early_res[i];
+ if (last >= r->start && addr < r->end) {
+ *addrp = addr = r->end;
+ changed = 1;
+ goto again;
}
}
-#endif
- /* kernel code */
- if (last >= __pa_symbol(&_text) && addr < __pa_symbol(&_end)) {
- *addrp = PAGE_ALIGN(__pa_symbol(&_end));
- return 1;
- }
-
- if (last >= ebda_addr && addr < ebda_addr + ebda_size) {
- *addrp = PAGE_ALIGN(ebda_addr + ebda_size);
- return 1;
- }
-
-#ifdef CONFIG_NUMA
- /* NUMA memory to node map */
- if (last >= nodemap_addr && addr < nodemap_addr + nodemap_size) {
- *addrp = nodemap_addr + nodemap_size;
- return 1;
- }
-#endif
- /* XXX ramdisk image here? */
- return 0;
+ return changed;
}
/*
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 87e031d4abf1..58438bafedca 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -21,6 +21,7 @@
#include <asm/tlbflush.h>
#include <asm/sections.h>
#include <asm/kdebug.h>
+#include <asm/e820.h>
static void __init zap_identity_mappings(void)
{
@@ -48,6 +49,35 @@ static void __init copy_bootdata(char *real_mode_data)
}
}
+#define EBDA_ADDR_POINTER 0x40E
+
+static __init void reserve_ebda(void)
+{
+ unsigned ebda_addr, ebda_size;
+
+ /*
+ * there is a real-mode segmented pointer pointing to the
+ * 4K EBDA area at 0x40E
+ */
+ ebda_addr = *(unsigned short *)__va(EBDA_ADDR_POINTER);
+ ebda_addr <<= 4;
+
+ if (!ebda_addr)
+ return;
+
+ ebda_size = *(unsigned short *)__va(ebda_addr);
+
+ /* Round EBDA up to pages */
+ if (ebda_size == 0)
+ ebda_size = 1;
+ ebda_size <<= 10;
+ ebda_size = round_up(ebda_size + (ebda_addr & ~PAGE_MASK), PAGE_SIZE);
+ if (ebda_size > 64*1024)
+ ebda_size = 64*1024;
+
+ reserve_early(ebda_addr, ebda_addr + ebda_size);
+}
+
void __init x86_64_start_kernel(char * real_mode_data)
{
int i;
@@ -75,5 +105,23 @@ void __init x86_64_start_kernel(char * real_mode_data)
pda_init(0);
copy_bootdata(__va(real_mode_data));
+ reserve_early(__pa_symbol(&_text), __pa_symbol(&_end));
+
+ /* Reserve INITRD */
+ if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) {
+ unsigned long ramdisk_image = boot_params.hdr.ramdisk_image;
+ unsigned long ramdisk_size = boot_params.hdr.ramdisk_size;
+ unsigned long ramdisk_end = ramdisk_image + ramdisk_size;
+ reserve_early(ramdisk_image, ramdisk_end);
+ }
+
+ reserve_ebda();
+
+ /*
+ * At this point everything still needed from the boot loader
+ * or BIOS or kernel text should be early reserved or marked not
+ * RAM in e820. All other memory is free game.
+ */
+
start_kernel();
}
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index 4a3f00b49236..6cbd15625dce 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -245,41 +245,6 @@ static inline void __init reserve_crashkernel(void)
{}
#endif
-#define EBDA_ADDR_POINTER 0x40E
-
-unsigned __initdata ebda_addr;
-unsigned __initdata ebda_size;
-
-static void __init discover_ebda(void)
-{
- /*
- * there is a real-mode segmented pointer pointing to the
- * 4K EBDA area at 0x40E
- */
- ebda_addr = *(unsigned short *)__va(EBDA_ADDR_POINTER);
- /*
- * There can be some situations, like paravirtualized guests,
- * in which there is no available ebda information. In such
- * case, just skip it
- */
- if (!ebda_addr) {
- ebda_size = 0;
- return;
- }
-
- ebda_addr <<= 4;
-
- ebda_size = *(unsigned short *)__va(ebda_addr);
-
- /* Round EBDA up to pages */
- if (ebda_size == 0)
- ebda_size = 1;
- ebda_size <<= 10;
- ebda_size = round_up(ebda_size + (ebda_addr & ~PAGE_MASK), PAGE_SIZE);
- if (ebda_size > 64*1024)
- ebda_size = 64*1024;
-}
-
/* Overridden in paravirt.c if CONFIG_PARAVIRT */
void __attribute__((weak)) __init memory_setup(void)
{
@@ -349,8 +314,6 @@ void __init setup_arch(char **cmdline_p)
check_efer();
- discover_ebda();
-
init_memory_mapping(0, (end_pfn_map << PAGE_SHIFT));
if (efi_enabled)
efi_init();
@@ -397,33 +360,7 @@ void __init setup_arch(char **cmdline_p)
contig_initmem_init(0, end_pfn);
#endif
- /* Reserve direct mapping */
- reserve_bootmem_generic(table_start << PAGE_SHIFT,
- (table_end - table_start) << PAGE_SHIFT);
-
- /* reserve kernel */
- reserve_bootmem_generic(__pa_symbol(&_text),
- __pa_symbol(&_end) - __pa_symbol(&_text));
-
- /*
- * reserve physical page 0 - it's a special BIOS page on many boxes,
- * enabling clean reboots, SMP operation, laptop functions.
- */
- reserve_bootmem_generic(0, PAGE_SIZE);
-
- /* reserve ebda region */
- if (ebda_addr)
- reserve_bootmem_generic(ebda_addr, ebda_size);
-#ifdef CONFIG_NUMA
- /* reserve nodemap region */
- if (nodemap_addr)
- reserve_bootmem_generic(nodemap_addr, nodemap_size);
-#endif
-
-#ifdef CONFIG_SMP
- /* Reserve SMP trampoline */
- reserve_bootmem_generic(SMP_TRAMPOLINE_BASE, 2*PAGE_SIZE);
-#endif
+ early_res_to_bootmem();
#ifdef CONFIG_ACPI_SLEEP
/*
@@ -453,6 +390,8 @@ void __init setup_arch(char **cmdline_p)
initrd_start = ramdisk_image + PAGE_OFFSET;
initrd_end = initrd_start+ramdisk_size;
} else {
+ /* Assumes everything on node 0 */
+ free_bootmem(ramdisk_image, ramdisk_size);
printk(KERN_ERR "initrd extends beyond end of memory "
"(0x%08lx > 0x%08lx)\ndisabling initrd\n",
ramdisk_end, end_of_mem);
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 05f12c527b02..8198840c3dcb 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -176,7 +176,8 @@ __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
set_pte_phys(address, phys, prot);
}
-unsigned long __meminitdata table_start, table_end;
+static unsigned long __initdata table_start;
+static unsigned long __meminitdata table_end;
static __meminit void *alloc_low_page(unsigned long *phys)
{
@@ -387,6 +388,8 @@ void __init_refok init_memory_mapping(unsigned long start, unsigned long end)
if (!after_bootmem)
mmu_cr4_features = read_cr4();
__flush_tlb_all();
+
+ reserve_early(table_start << PAGE_SHIFT, table_end << PAGE_SHIFT);
}
#ifndef CONFIG_NUMA
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 37d429beba96..5d24dc1ec237 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -102,6 +102,7 @@ static int __init allocate_cachealigned_memnodemap(void)
}
pad_addr = (nodemap_addr + pad) & ~pad;
memnodemap = phys_to_virt(pad_addr);
+ reserve_early(nodemap_addr, nodemap_addr + nodemap_size);
printk(KERN_DEBUG "NUMA: Allocated memnodemap from %lx - %lx\n",
nodemap_addr, nodemap_addr + nodemap_size);
diff --git a/include/asm-x86/e820_64.h b/include/asm-x86/e820_64.h
index ff36f434f00b..51e4170f9ca5 100644
--- a/include/asm-x86/e820_64.h
+++ b/include/asm-x86/e820_64.h
@@ -41,8 +41,8 @@ extern void finish_e820_parsing(void);
extern struct e820map e820;
extern void update_e820(void);
-extern unsigned ebda_addr, ebda_size;
-extern unsigned long nodemap_addr, nodemap_size;
+extern void reserve_early(unsigned long start, unsigned long end);
+extern void early_res_to_bootmem(void);
#endif/*!__ASSEMBLY__*/
diff --git a/include/asm-x86/proto.h b/include/asm-x86/proto.h
index 3c6fb89f1120..68563c0709ac 100644
--- a/include/asm-x86/proto.h
+++ b/include/asm-x86/proto.h
@@ -22,8 +22,6 @@ extern void syscall32_cpu_init(void);
extern void check_efer(void);
-extern unsigned long table_start, table_end;
-
extern int reboot_force;
long do_arch_prctl(struct task_struct *task, int code, unsigned long addr);