diff options
author | Linus Torvalds <torvalds@woody.osdl.org> | 2006-12-07 15:39:22 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.osdl.org> | 2006-12-07 15:39:22 -0800 |
commit | 6ee7e78e7c78d871409ad4df30551c9355be7d0e (patch) | |
tree | 84deeea72d7234d0b5652483b11760f394ae1131 | |
parent | 7f3af60e5a444b287d740a84998a8f480645dadf (diff) | |
parent | 52fd91088bcbaea5ab441d09d39c21eb684e54ea (diff) | |
download | lwn-6ee7e78e7c78d871409ad4df30551c9355be7d0e.tar.gz lwn-6ee7e78e7c78d871409ad4df30551c9355be7d0e.zip |
Merge branch 'release' of master.kernel.org:/pub/scm/linux/kernel/git/aegl/linux-2.6
* 'release' of master.kernel.org:/pub/scm/linux/kernel/git/aegl/linux-2.6:
[IA64] replace kmalloc+memset with kzalloc
[IA64] resolve name clash by renaming is_available_memory()
[IA64] Need export for csum_ipv6_magic
[IA64] Fix DISCONTIGMEM without VIRTUAL_MEM_MAP
[PATCH] Add support for type argument in PAL_GET_PSTATE
[IA64] tidy up return value of ip_fast_csum
[IA64] implement csum_ipv6_magic for ia64.
[IA64] More Itanium PAL spec updates
[IA64] Update processor_info features
[IA64] Add se bit to Processor State Parameter structure
[IA64] Add dp bit to cache and bus check structs
[IA64] SN: Correctly update smp_affinty mask
[IA64] sparse cleanups
[IA64] IA64 Kexec/kdump
34 files changed, 1146 insertions, 63 deletions
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 683b12c6f76c..75d839715b2f 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -434,6 +434,29 @@ config IA64_ESI source "drivers/sn/Kconfig" +config KEXEC + bool "kexec system call (EXPERIMENTAL)" + depends on EXPERIMENTAL && !IA64_HP_SIM && (!SMP || HOTPLUG_CPU) + help + kexec is a system call that implements the ability to shutdown your + current kernel, and to start another kernel. It is like a reboot + but it is indepedent of the system firmware. And like a reboot + you can start any kernel with it, not just Linux. + + The name comes from the similiarity to the exec system call. + + It is an ongoing process to be certain the hardware in a machine + is properly shutdown, so do not be surprised if this code does not + initially work for you. It may help to enable device hotplugging + support. As of this writing the exact hardware interface is + strongly in flux, so no good recommendation can be made. + +config CRASH_DUMP + bool "kernel crash dumps (EXPERIMENTAL)" + depends on EXPERIMENTAL && IA64_MCA_RECOVERY && !IA64_HP_SIM && (!SMP || HOTPLUG_CPU) + help + Generate crash dump after being started by kexec. + source "drivers/firmware/Kconfig" source "fs/Kconfig.binfmt" diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c index 14691cda05c3..ce49fe3a3b56 100644 --- a/arch/ia64/hp/common/sba_iommu.c +++ b/arch/ia64/hp/common/sba_iommu.c @@ -1672,15 +1672,13 @@ ioc_sac_init(struct ioc *ioc) * SAC (single address cycle) addressable, so allocate a * pseudo-device to enforce that. */ - sac = kmalloc(sizeof(*sac), GFP_KERNEL); + sac = kzalloc(sizeof(*sac), GFP_KERNEL); if (!sac) panic(PFX "Couldn't allocate struct pci_dev"); - memset(sac, 0, sizeof(*sac)); - controller = kmalloc(sizeof(*controller), GFP_KERNEL); + controller = kzalloc(sizeof(*controller), GFP_KERNEL); if (!controller) panic(PFX "Couldn't allocate struct pci_controller"); - memset(controller, 0, sizeof(*controller)); controller->iommu = ioc; sac->sysdata = controller; @@ -1737,12 +1735,10 @@ ioc_init(u64 hpa, void *handle) struct ioc *ioc; struct ioc_iommu *info; - ioc = kmalloc(sizeof(*ioc), GFP_KERNEL); + ioc = kzalloc(sizeof(*ioc), GFP_KERNEL); if (!ioc) return NULL; - memset(ioc, 0, sizeof(*ioc)); - ioc->next = ioc_list; ioc_list = ioc; diff --git a/arch/ia64/hp/sim/simserial.c b/arch/ia64/hp/sim/simserial.c index b62f0c4d2c7c..1f16ebb9a800 100644 --- a/arch/ia64/hp/sim/simserial.c +++ b/arch/ia64/hp/sim/simserial.c @@ -684,12 +684,11 @@ static int get_async_struct(int line, struct async_struct **ret_info) *ret_info = sstate->info; return 0; } - info = kmalloc(sizeof(struct async_struct), GFP_KERNEL); + info = kzalloc(sizeof(struct async_struct), GFP_KERNEL); if (!info) { sstate->count--; return -ENOMEM; } - memset(info, 0, sizeof(struct async_struct)); init_waitqueue_head(&info->open_wait); init_waitqueue_head(&info->close_wait); init_waitqueue_head(&info->delta_msr_wait); diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile index cfa099b04cda..8ae384eb5357 100644 --- a/arch/ia64/kernel/Makefile +++ b/arch/ia64/kernel/Makefile @@ -28,6 +28,7 @@ obj-$(CONFIG_IA64_CYCLONE) += cyclone.o obj-$(CONFIG_CPU_FREQ) += cpufreq/ obj-$(CONFIG_IA64_MCA_RECOVERY) += mca_recovery.o obj-$(CONFIG_KPROBES) += kprobes.o jprobes.o +obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o crash.o obj-$(CONFIG_IA64_UNCACHED_ALLOCATOR) += uncached.o obj-$(CONFIG_AUDIT) += audit.o obj-$(CONFIG_PCI_MSI) += msi_ia64.o diff --git a/arch/ia64/kernel/cpufreq/acpi-cpufreq.c b/arch/ia64/kernel/cpufreq/acpi-cpufreq.c index 86faf221a070..088f130197ae 100644 --- a/arch/ia64/kernel/cpufreq/acpi-cpufreq.c +++ b/arch/ia64/kernel/cpufreq/acpi-cpufreq.c @@ -68,7 +68,8 @@ processor_get_pstate ( dprintk("processor_get_pstate\n"); - retval = ia64_pal_get_pstate(&pstate_index); + retval = ia64_pal_get_pstate(&pstate_index, + PAL_GET_PSTATE_TYPE_INSTANT); *value = (u32) pstate_index; if (retval) @@ -91,7 +92,7 @@ extract_clock ( dprintk("extract_clock\n"); for (i = 0; i < data->acpi_data.state_count; i++) { - if (value >= data->acpi_data.states[i].control) + if (value == data->acpi_data.states[i].status) return data->acpi_data.states[i].core_frequency; } return data->acpi_data.states[i-1].core_frequency; @@ -117,11 +118,7 @@ processor_get_freq ( goto migrate_end; } - /* - * processor_get_pstate gets the average frequency since the - * last get. So, do two PAL_get_freq()... - */ - ret = processor_get_pstate(&value); + /* processor_get_pstate gets the instantaneous frequency */ ret = processor_get_pstate(&value); if (ret) { diff --git a/arch/ia64/kernel/crash.c b/arch/ia64/kernel/crash.c new file mode 100644 index 000000000000..0aabedf95dad --- /dev/null +++ b/arch/ia64/kernel/crash.c @@ -0,0 +1,245 @@ +/* + * arch/ia64/kernel/crash.c + * + * Architecture specific (ia64) functions for kexec based crash dumps. + * + * Created by: Khalid Aziz <khalid.aziz@hp.com> + * Copyright (C) 2005 Hewlett-Packard Development Company, L.P. + * Copyright (C) 2005 Intel Corp Zou Nan hai <nanhai.zou@intel.com> + * + */ +#include <linux/smp.h> +#include <linux/delay.h> +#include <linux/crash_dump.h> +#include <linux/bootmem.h> +#include <linux/kexec.h> +#include <linux/elfcore.h> +#include <linux/sysctl.h> +#include <linux/init.h> + +#include <asm/kdebug.h> +#include <asm/mca.h> +#include <asm/uaccess.h> + +int kdump_status[NR_CPUS]; +atomic_t kdump_cpu_freezed; +atomic_t kdump_in_progress; +int kdump_on_init = 1; +ssize_t +copy_oldmem_page(unsigned long pfn, char *buf, + size_t csize, unsigned long offset, int userbuf) +{ + void *vaddr; + + if (!csize) + return 0; + vaddr = __va(pfn<<PAGE_SHIFT); + if (userbuf) { + if (copy_to_user(buf, (vaddr + offset), csize)) { + return -EFAULT; + } + } else + memcpy(buf, (vaddr + offset), csize); + return csize; +} + +static inline Elf64_Word +*append_elf_note(Elf64_Word *buf, char *name, unsigned type, void *data, + size_t data_len) +{ + struct elf_note *note = (struct elf_note *)buf; + note->n_namesz = strlen(name) + 1; + note->n_descsz = data_len; + note->n_type = type; + buf += (sizeof(*note) + 3)/4; + memcpy(buf, name, note->n_namesz); + buf += (note->n_namesz + 3)/4; + memcpy(buf, data, data_len); + buf += (data_len + 3)/4; + return buf; +} + +static void +final_note(void *buf) +{ + memset(buf, 0, sizeof(struct elf_note)); +} + +extern void ia64_dump_cpu_regs(void *); + +static DEFINE_PER_CPU(struct elf_prstatus, elf_prstatus); + +void +crash_save_this_cpu() +{ + void *buf; + unsigned long cfm, sof, sol; + + int cpu = smp_processor_id(); + struct elf_prstatus *prstatus = &per_cpu(elf_prstatus, cpu); + + elf_greg_t *dst = (elf_greg_t *)&(prstatus->pr_reg); + memset(prstatus, 0, sizeof(*prstatus)); + prstatus->pr_pid = current->pid; + + ia64_dump_cpu_regs(dst); + cfm = dst[43]; + sol = (cfm >> 7) & 0x7f; + sof = cfm & 0x7f; + dst[46] = (unsigned long)ia64_rse_skip_regs((unsigned long *)dst[46], + sof - sol); + + buf = (u64 *) per_cpu_ptr(crash_notes, cpu); + if (!buf) + return; + buf = append_elf_note(buf, "CORE", NT_PRSTATUS, prstatus, + sizeof(*prstatus)); + final_note(buf); +} + +static int +kdump_wait_cpu_freeze(void) +{ + int cpu_num = num_online_cpus() - 1; + int timeout = 1000; + while(timeout-- > 0) { + if (atomic_read(&kdump_cpu_freezed) == cpu_num) + return 0; + udelay(1000); + } + return 1; +} + +void +machine_crash_shutdown(struct pt_regs *pt) +{ + /* This function is only called after the system + * has paniced or is otherwise in a critical state. + * The minimum amount of code to allow a kexec'd kernel + * to run successfully needs to happen here. + * + * In practice this means shooting down the other cpus in + * an SMP system. + */ + kexec_disable_iosapic(); +#ifdef CONFIG_SMP + kdump_smp_send_stop(); + if (kdump_wait_cpu_freeze() && kdump_on_init) { + //not all cpu response to IPI, send INIT to freeze them + kdump_smp_send_init(); + } +#endif +} + +static void +machine_kdump_on_init(void) +{ + local_irq_disable(); + kexec_disable_iosapic(); + machine_kexec(ia64_kimage); +} + +void +kdump_cpu_freeze(struct unw_frame_info *info, void *arg) +{ + int cpuid; + local_irq_disable(); + cpuid = smp_processor_id(); + crash_save_this_cpu(); + current->thread.ksp = (__u64)info->sw - 16; + atomic_inc(&kdump_cpu_freezed); + kdump_status[cpuid] = 1; + mb(); + if (cpuid == 0) { + for (;;) + cpu_relax(); + } else + ia64_jump_to_sal(&sal_boot_rendez_state[cpuid]); +} + +static int +kdump_init_notifier(struct notifier_block *self, unsigned long val, void *data) +{ + struct ia64_mca_notify_die *nd; + struct die_args *args = data; + + if (!kdump_on_init) + return NOTIFY_DONE; + + if (val != DIE_INIT_MONARCH_ENTER && + val != DIE_INIT_SLAVE_ENTER && + val != DIE_MCA_RENDZVOUS_LEAVE && + val != DIE_MCA_MONARCH_LEAVE) + return NOTIFY_DONE; + + nd = (struct ia64_mca_notify_die *)args->err; + /* Reason code 1 means machine check rendezous*/ + if ((val == DIE_INIT_MONARCH_ENTER || DIE_INIT_SLAVE_ENTER) && + nd->sos->rv_rc == 1) + return NOTIFY_DONE; + + switch (val) { + case DIE_INIT_MONARCH_ENTER: + machine_kdump_on_init(); + break; + case DIE_INIT_SLAVE_ENTER: + unw_init_running(kdump_cpu_freeze, NULL); + break; + case DIE_MCA_RENDZVOUS_LEAVE: + if (atomic_read(&kdump_in_progress)) + unw_init_running(kdump_cpu_freeze, NULL); + break; + case DIE_MCA_MONARCH_LEAVE: + /* die_register->signr indicate if MCA is recoverable */ + if (!args->signr) + machine_kdump_on_init(); + break; + } + return NOTIFY_DONE; +} + +#ifdef CONFIG_SYSCTL +static ctl_table kdump_on_init_table[] = { + { + .ctl_name = CTL_UNNUMBERED, + .procname = "kdump_on_init", + .data = &kdump_on_init, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { .ctl_name = 0 } +}; + +static ctl_table sys_table[] = { + { + .ctl_name = CTL_KERN, + .procname = "kernel", + .mode = 0555, + .child = kdump_on_init_table, + }, + { .ctl_name = 0 } +}; +#endif + +static int +machine_crash_setup(void) +{ + char *from = strstr(saved_command_line, "elfcorehdr="); + static struct notifier_block kdump_init_notifier_nb = { + .notifier_call = kdump_init_notifier, + }; + int ret; + if (from) + elfcorehdr_addr = memparse(from+11, &from); + saved_max_pfn = (unsigned long)-1; + if((ret = register_die_notifier(&kdump_init_notifier_nb)) != 0) + return ret; +#ifdef CONFIG_SYSCTL + register_sysctl_table(sys_table, 0); +#endif + return 0; +} + +__initcall(machine_crash_setup); + diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c index bb8770a177b5..0b25a7d4e1e4 100644 --- a/arch/ia64/kernel/efi.c +++ b/arch/ia64/kernel/efi.c @@ -26,6 +26,7 @@ #include <linux/types.h> #include <linux/time.h> #include <linux/efi.h> +#include <linux/kexec.h> #include <asm/io.h> #include <asm/kregs.h> @@ -41,7 +42,7 @@ extern efi_status_t efi_call_phys (void *, ...); struct efi efi; EXPORT_SYMBOL(efi); static efi_runtime_services_t *runtime; -static unsigned long mem_limit = ~0UL, max_addr = ~0UL; +static unsigned long mem_limit = ~0UL, max_addr = ~0UL, min_addr = 0UL; #define efi_call_virt(f, args...) (*(f))(args) @@ -224,7 +225,7 @@ efi_gettimeofday (struct timespec *ts) } static int -is_available_memory (efi_memory_desc_t *md) +is_memory_available (efi_memory_desc_t *md) { if (!(md->attribute & EFI_MEMORY_WB)) return 0; @@ -421,6 +422,8 @@ efi_init (void) mem_limit = memparse(cp + 4, &cp); } else if (memcmp(cp, "max_addr=", 9) == 0) { max_addr = GRANULEROUNDDOWN(memparse(cp + 9, &cp)); + } else if (memcmp(cp, "min_addr=", 9) == 0) { + min_addr = GRANULEROUNDDOWN(memparse(cp + 9, &cp)); } else { while (*cp != ' ' && *cp) ++cp; @@ -428,6 +431,8 @@ efi_init (void) ++cp; } } + if (min_addr != 0UL) + printk(KERN_INFO "Ignoring memory below %luMB\n", min_addr >> 20); if (max_addr != ~0UL) printk(KERN_INFO "Ignoring memory above %luMB\n", max_addr >> 20); @@ -887,14 +892,15 @@ find_memmap_space (void) } contig_high = GRANULEROUNDDOWN(contig_high); } - if (!is_available_memory(md) || md->type == EFI_LOADER_DATA) + if (!is_memory_available(md) || md->type == EFI_LOADER_DATA) continue; /* Round ends inward to granule boundaries */ as = max(contig_low, md->phys_addr); ae = min(contig_high, efi_md_end(md)); - /* keep within max_addr= command line arg */ + /* keep within max_addr= and min_addr= command line arg */ + as = max(as, min_addr); ae = min(ae, max_addr); if (ae <= as) continue; @@ -962,7 +968,7 @@ efi_memmap_init(unsigned long *s, unsigned long *e) } contig_high = GRANULEROUNDDOWN(contig_high); } - if (!is_available_memory(md)) + if (!is_memory_available(md)) continue; /* @@ -1004,7 +1010,8 @@ efi_memmap_init(unsigned long *s, unsigned long *e) } else ae = efi_md_end(md); - /* keep within max_addr= command line arg */ + /* keep within max_addr= and min_addr= command line arg */ + as = max(as, min_addr); ae = min(ae, max_addr); if (ae <= as) continue; @@ -1116,6 +1123,58 @@ efi_initialize_iomem_resources(struct resource *code_resource, */ insert_resource(res, code_resource); insert_resource(res, data_resource); +#ifdef CONFIG_KEXEC + insert_resource(res, &efi_memmap_res); + insert_resource(res, &boot_param_res); + if (crashk_res.end > crashk_res.start) + insert_resource(res, &crashk_res); +#endif } } } + +#ifdef CONFIG_KEXEC +/* find a block of memory aligned to 64M exclude reserved regions + rsvd_regions are sorted + */ +unsigned long +kdump_find_rsvd_region (unsigned long size, + struct rsvd_region *r, int n) +{ + int i; + u64 start, end; + u64 alignment = 1UL << _PAGE_SIZE_64M; + void *efi_map_start, *efi_map_end, *p; + efi_memory_desc_t *md; + u64 efi_desc_size; + + efi_map_start = __va(ia64_boot_param->efi_memmap); + efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size; + efi_desc_size = ia64_boot_param->efi_memdesc_size; + + for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) { + md = p; + if (!efi_wb(md)) + continue; + start = ALIGN(md->phys_addr, alignment); + end = efi_md_end(md); + for (i = 0; i < n; i++) { + if (__pa(r[i].start) >= start && __pa(r[i].end) < end) { + if (__pa(r[i].start) > start + size) + return start; + start = ALIGN(__pa(r[i].end), alignment); + if (i < n-1 && __pa(r[i+1].start) < start + size) + continue; + else + break; + } + } + if (end > start + size) + return start; + } + + printk(KERN_WARNING "Cannot reserve 0x%lx byte of memory for crashdump\n", + size); + return ~0UL; +} +#endif diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index 3390b7c5a63f..15234ed3a341 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S @@ -1575,7 +1575,7 @@ sys_call_table: data8 sys_mq_timedreceive // 1265 data8 sys_mq_notify data8 sys_mq_getsetattr - data8 sys_ni_syscall // reserved for kexec_load + data8 sys_kexec_load data8 sys_ni_syscall // reserved for vserver data8 sys_waitid // 1270 data8 sys_add_key diff --git a/arch/ia64/kernel/ia64_ksyms.c b/arch/ia64/kernel/ia64_ksyms.c index 879c1817bd1c..bd17190bebb6 100644 --- a/arch/ia64/kernel/ia64_ksyms.c +++ b/arch/ia64/kernel/ia64_ksyms.c @@ -14,6 +14,7 @@ EXPORT_SYMBOL(strlen); #include <asm/checksum.h> EXPORT_SYMBOL(ip_fast_csum); /* hand-coded assembly */ +EXPORT_SYMBOL(csum_ipv6_magic); #include <asm/semaphore.h> EXPORT_SYMBOL(__down); diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c index 60d64950e3c2..0fc5fb7865cf 100644 --- a/arch/ia64/kernel/iosapic.c +++ b/arch/ia64/kernel/iosapic.c @@ -288,6 +288,27 @@ nop (unsigned int irq) /* do nothing... */ } + +#ifdef CONFIG_KEXEC +void +kexec_disable_iosapic(void) +{ + struct iosapic_intr_info *info; + struct iosapic_rte_info *rte; + u8 vec = 0; + for (info = iosapic_intr_info; info < + iosapic_intr_info + IA64_NUM_VECTORS; ++info, ++vec) { + list_for_each_entry(rte, &info->rtes, + rte_list) { + iosapic_write(rte->addr, + IOSAPIC_RTE_LOW(rte->rte_index), + IOSAPIC_MASK|vec); + iosapic_eoi(rte->addr, vec); + } + } +} +#endif + static void mask_irq (unsigned int irq) { diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c index 4d592ee9300b..76e778951e20 100644 --- a/arch/ia64/kernel/kprobes.c +++ b/arch/ia64/kernel/kprobes.c @@ -851,7 +851,7 @@ static void ia64_get_bsp_cfm(struct unw_frame_info *info, void *arg) return; } } while (unw_unwind(info) >= 0); - lp->bsp = 0; + lp->bsp = NULL; lp->cfm = 0; return; } diff --git a/arch/ia64/kernel/machine_kexec.c b/arch/ia64/kernel/machine_kexec.c new file mode 100644 index 000000000000..468233fa2cee --- /dev/null +++ b/arch/ia64/kernel/machine_kexec.c @@ -0,0 +1,133 @@ +/* + * arch/ia64/kernel/machine_kexec.c + * + * Handle transition of Linux booting another kernel + * Copyright (C) 2005 Hewlett-Packard Development Comapny, L.P. + * Copyright (C) 2005 Khalid Aziz <khalid.aziz@hp.com> + * Copyright (C) 2006 Intel Corp, Zou Nan hai <nanhai.zou@intel.com> + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#include <linux/mm.h> +#include <linux/kexec.h> +#include <linux/cpu.h> +#include <linux/irq.h> +#include <asm/mmu_context.h> +#include <asm/setup.h> +#include <asm/delay.h> +#include <asm/meminit.h> + +typedef void (*relocate_new_kernel_t)(unsigned long, unsigned long, + struct ia64_boot_param *, unsigned long); + +struct kimage *ia64_kimage; + +struct resource efi_memmap_res = { + .name = "EFI Memory Map", + .start = 0, + .end = 0, + .flags = IORESOURCE_BUSY | IORESOURCE_MEM +}; + +struct resource boot_param_res = { + .name = "Boot parameter", + .start = 0, + .end = 0, + .flags = IORESOURCE_BUSY | IORESOURCE_MEM +}; + + +/* + * Do what every setup is needed on image and the + * reboot code buffer to allow us to avoid allocations + * later. + */ +int machine_kexec_prepare(struct kimage *image) +{ + void *control_code_buffer; + const unsigned long *func; + + func = (unsigned long *)&relocate_new_kernel; + /* Pre-load control code buffer to minimize work in kexec path */ + control_code_buffer = page_address(image->control_code_page); + memcpy((void *)control_code_buffer, (const void *)func[0], + relocate_new_kernel_size); + flush_icache_range((unsigned long)control_code_buffer, + (unsigned long)control_code_buffer + relocate_new_kernel_size); + ia64_kimage = image; + + return 0; +} + +void machine_kexec_cleanup(struct kimage *image) +{ +} + +void machine_shutdown(void) +{ + int cpu; + + for_each_online_cpu(cpu) { + if (cpu != smp_processor_id()) + cpu_down(cpu); + } + kexec_disable_iosapic(); +} + +/* + * Do not allocate memory (or fail in any way) in machine_kexec(). + * We are past the point of no return, committed to rebooting now. + */ +extern void *efi_get_pal_addr(void); +static void ia64_machine_kexec(struct unw_frame_info *info, void *arg) +{ + struct kimage *image = arg; + relocate_new_kernel_t rnk; + void *pal_addr = efi_get_pal_addr(); + unsigned long code_addr = (unsigned long)page_address(image->control_code_page); + unsigned long vector; + int ii; + + if (image->type == KEXEC_TYPE_CRASH) { + crash_save_this_cpu(); + current->thread.ksp = (__u64)info->sw - 16; + } + + /* Interrupts aren't acceptable while we reboot */ + local_irq_disable(); + + /* Mask CMC and Performance Monitor interrupts */ + ia64_setreg(_IA64_REG_CR_PMV, 1 << 16); + ia64_setreg(_IA64_REG_CR_CMCV, 1 << 16); + + /* Mask ITV and Local Redirect Registers */ + ia64_set_itv(1 << 16); + ia64_set_lrr0(1 << 16); + ia64_set_lrr1(1 << 16); + + /* terminate possible nested in-service interrupts */ + for (ii = 0; ii < 16; ii++) + ia64_eoi(); + + /* unmask TPR and clear any pending interrupts */ + ia64_setreg(_IA64_REG_CR_TPR, 0); + ia64_srlz_d(); + vector = ia64_get_ivr(); + while (vector != IA64_SPURIOUS_INT_VECTOR) { + ia64_eoi(); + vector = ia64_get_ivr(); + } + platform_kernel_launch_event(); + rnk = (relocate_new_kernel_t)&code_addr; + (*rnk)(image->head, image->start, ia64_boot_param, + GRANULEROUNDDOWN((unsigned long) pal_addr)); + BUG(); +} + +void machine_kexec(struct kimage *image) +{ + unw_init_running(ia64_machine_kexec, image); + for(;;); +} diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c index 6bedd97570ca..87c1c4f42872 100644 --- a/arch/ia64/kernel/mca.c +++ b/arch/ia64/kernel/mca.c @@ -82,6 +82,7 @@ #include <asm/system.h> #include <asm/sal.h> #include <asm/mca.h> +#include <asm/kexec.h> #include <asm/irq.h> #include <asm/hw_irq.h> @@ -1238,6 +1239,10 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw, } else { /* Dump buffered message to console */ ia64_mlogbuf_finish(1); +#ifdef CONFIG_CRASH_DUMP + atomic_set(&kdump_in_progress, 1); + monarch_cpu = -1; +#endif } if (notify_die(DIE_MCA_MONARCH_LEAVE, "MCA", regs, (long)&nd, 0, recover) == NOTIFY_STOP) diff --git a/arch/ia64/kernel/palinfo.c b/arch/ia64/kernel/palinfo.c index c4c10a0b99d9..a71df9ae0397 100644 --- a/arch/ia64/kernel/palinfo.c +++ b/arch/ia64/kernel/palinfo.c @@ -16,6 +16,7 @@ * 02/05/2001 S.Eranian fixed module support * 10/23/2001 S.Eranian updated pal_perf_mon_info bug fixes * 03/24/2004 Ashok Raj updated to work with CPU Hotplug + * 10/26/2006 Russ Anderson updated processor features to rev 2.2 spec */ #include <linux/types.h> #include <linux/errno.h> @@ -314,13 +315,20 @@ vm_info(char *page) "Protection Key Registers(PKR) : %d\n" "Implemented bits in PKR.key : %d\n" "Hash Tag ID : 0x%x\n" - "Size of RR.rid : %d\n", + "Size of RR.rid : %d\n" + "Max Purges : ", vm_info_1.pal_vm_info_1_s.phys_add_size, vm_info_2.pal_vm_info_2_s.impl_va_msb+1, vm_info_1.pal_vm_info_1_s.max_pkr+1, vm_info_1.pal_vm_info_1_s.key_size, vm_info_1.pal_vm_info_1_s.hash_tag_id, vm_info_2.pal_vm_info_2_s.rid_size); + if (vm_info_2.pal_vm_info_2_s.max_purges == PAL_MAX_PURGES) + p += sprintf(p, "unlimited\n"); + else + p += sprintf(p, "%d\n", + vm_info_2.pal_vm_info_2_s.max_purges ? + vm_info_2.pal_vm_info_2_s.max_purges : 1); } if (ia64_pal_mem_attrib(&attrib) == 0) { @@ -467,7 +475,11 @@ static const char *proc_features[]={ NULL,NULL,NULL,NULL,NULL,NULL,NULL, NULL,NULL, NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, NULL,NULL,NULL,NULL,NULL, NULL,NULL,NULL,NULL, - NULL,NULL,NULL,NULL,NULL, + "Unimplemented instruction address fault", + "INIT, PMI, and LINT pins", + "Simple unimplemented instr addresses", + "Variable P-state performance", + "Virtual machine features implemented", "XIP,XPSR,XFS implemented", "XR1-XR3 implemented", "Disable dynamic predicate prediction", @@ -475,7 +487,11 @@ static const char *proc_features[]={ "Disable dynamic data cache prefetch", "Disable dynamic inst cache prefetch", "Disable dynamic branch prediction", - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + "Disable P-states", + "Enable MCA on Data Poisoning", + "Enable vmsw instruction", + "Enable extern environmental notification", "Disable BINIT on processor time-out", "Disable dynamic power management (DPM)", "Disable coherency", diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index e2321536ee4c..dbb28164b19b 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -853,9 +853,8 @@ pfm_context_alloc(void) * allocate context descriptor * must be able to free with interrupts disabled */ - ctx = kmalloc(sizeof(pfm_context_t), GFP_KERNEL); + ctx = kzalloc(sizeof(pfm_context_t), GFP_KERNEL); if (ctx) { - memset(ctx, 0, sizeof(pfm_context_t)); DPRINT(("alloc ctx @%p\n", ctx)); } return ctx; diff --git a/arch/ia64/kernel/perfmon_montecito.h b/arch/ia64/kernel/perfmon_montecito.h index cd06ac6a686c..7f8da4c7ca67 100644 --- a/arch/ia64/kernel/perfmon_montecito.h +++ b/arch/ia64/kernel/perfmon_montecito.h @@ -45,16 +45,16 @@ static pfm_reg_desc_t pfm_mont_pmc_desc[PMU_MAX_PMCS]={ /* pmc29 */ { PFM_REG_NOTIMPL, }, /* pmc30 */ { PFM_REG_NOTIMPL, }, /* pmc31 */ { PFM_REG_NOTIMPL, }, -/* pmc32 */ { PFM_REG_CONFIG, 0, 0x30f01ffffffffff, 0x30f01ffffffffff, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}}, -/* pmc33 */ { PFM_REG_CONFIG, 0, 0x0, 0x1ffffffffff, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}}, -/* pmc34 */ { PFM_REG_CONFIG, 0, 0xf01ffffffffff, 0xf01ffffffffff, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}}, -/* pmc35 */ { PFM_REG_CONFIG, 0, 0x0, 0x1ffffffffff, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}}, +/* pmc32 */ { PFM_REG_CONFIG, 0, 0x30f01ffffffffffUL, 0x30f01ffffffffffUL, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}}, +/* pmc33 */ { PFM_REG_CONFIG, 0, 0x0, 0x1ffffffffffUL, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}}, +/* pmc34 */ { PFM_REG_CONFIG, 0, 0xf01ffffffffffUL, 0xf01ffffffffffUL, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}}, +/* pmc35 */ { PFM_REG_CONFIG, 0, 0x0, 0x1ffffffffffUL, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}}, /* pmc36 */ { PFM_REG_CONFIG, 0, 0xfffffff0, 0xf, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}}, /* pmc37 */ { PFM_REG_MONITOR, 4, 0x0, 0x3fff, NULL, pfm_mont_pmc_check, {RDEP_MONT_IEAR, 0, 0, 0}, {0, 0, 0, 0}}, /* pmc38 */ { PFM_REG_CONFIG, 0, 0xdb6, 0x2492, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}}, /* pmc39 */ { PFM_REG_MONITOR, 6, 0x0, 0xffcf, NULL, pfm_mont_pmc_check, {RDEP_MONT_ETB,0, 0, 0}, {0,0, 0, 0}}, /* pmc40 */ { PFM_REG_MONITOR, 6, 0x2000000, 0xf01cf, NULL, pfm_mont_pmc_check, {RDEP_MONT_DEAR,0, 0, 0}, {0,0, 0, 0}}, -/* pmc41 */ { PFM_REG_CONFIG, 0, 0x00002078fefefefe, 0x1e00018181818, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}}, +/* pmc41 */ { PFM_REG_CONFIG, 0, 0x00002078fefefefeUL, 0x1e00018181818UL, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}}, /* pmc42 */ { PFM_REG_MONITOR, 6, 0x0, 0x7ff4f, NULL, pfm_mont_pmc_check, {RDEP_MONT_ETB,0, 0, 0}, {0,0, 0, 0}}, { PFM_REG_END , 0, 0x0, -1, NULL, NULL, {0,}, {0,}}, /* end marker */ }; @@ -185,7 +185,7 @@ pfm_mont_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cn DPRINT(("cnum=%u val=0x%lx, using_dbreg=%d loaded=%d\n", cnum, tmpval, ctx->ctx_fl_using_dbreg, is_loaded)); if (cnum == 41 && is_loaded - && (tmpval & 0x1e00000000000) && (tmpval & 0x18181818UL) != 0x18181818UL && ctx->ctx_fl_using_dbreg == 0) { + && (tmpval & 0x1e00000000000UL) && (tmpval & 0x18181818UL) != 0x18181818UL && ctx->ctx_fl_using_dbreg == 0) { DPRINT(("pmc[%d]=0x%lx has active pmc41 settings, clearing dbr\n", cnum, tmpval)); diff --git a/arch/ia64/kernel/relocate_kernel.S b/arch/ia64/kernel/relocate_kernel.S new file mode 100644 index 000000000000..ae473e3f2a0d --- /dev/null +++ b/arch/ia64/kernel/relocate_kernel.S @@ -0,0 +1,334 @@ +/* + * arch/ia64/kernel/relocate_kernel.S + * + * Relocate kexec'able kernel and start it + * + * Copyright (C) 2005 Hewlett-Packard Development Company, L.P. + * Copyright (C) 2005 Khalid Aziz <khalid.aziz@hp.com> + * Copyright (C) 2005 Intel Corp, Zou Nan hai <nanhai.zou@intel.com> + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ +#include <asm/asmmacro.h> +#include <asm/kregs.h> +#include <asm/page.h> +#include <asm/pgtable.h> +#include <asm/mca_asm.h> + + /* Must be relocatable PIC code callable as a C function + */ +GLOBAL_ENTRY(relocate_new_kernel) + .prologue + alloc r31=ar.pfs,4,0,0,0 + .body +.reloc_entry: +{ + rsm psr.i| psr.ic + mov r2=ip +} + ;; +{ + flushrs // must be first insn in group + srlz.i +} + ;; + dep r2=0,r2,61,3 //to physical address + ;; + //first switch to physical mode + add r3=1f-.reloc_entry, r2 + movl r16 = IA64_PSR_AC|IA64_PSR_BN|IA64_PSR_IC + mov ar.rsc=0 // put RSE in enforced lazy mode + ;; + add sp=(memory_stack_end - 16 - .reloc_entry),r2 + add r8=(register_stack - .reloc_entry),r2 + ;; + mov r18=ar.rnat + mov ar.bspstore=r8 + ;; + mov cr.ipsr=r16 + mov cr.iip=r3 + mov cr.ifs=r0 + srlz.i + ;; + mov ar.rnat=r18 + rfi + ;; +1: + //physical mode code begin + mov b6=in1 + dep r28=0,in2,61,3 //to physical address + + // purge all TC entries +#define O(member) IA64_CPUINFO_##member##_OFFSET + GET_THIS_PADDR(r2, cpu_info) // load phys addr of cpu_info into r2 + ;; + addl r17=O(PTCE_STRIDE),r2 + addl r2=O(PTCE_BASE),r2 + ;; + ld8 r18=[r2],(O(PTCE_COUNT)-O(PTCE_BASE));; // r18=ptce_base + ld4 r19=[r2],4 // r19=ptce_count[0] + ld4 r21=[r17],4 // r21=ptce_stride[0] + ;; + ld4 r20=[r2] // r20=ptce_count[1] + ld4 r22=[r17] // r22=ptce_stride[1] + mov r24=r0 + ;; + adds r20=-1,r20 + ;; +#undef O +2: + cmp.ltu p6,p7=r24,r19 +(p7) br.cond.dpnt.few 4f + mov ar.lc=r20 +3: + ptc.e r18 + ;; + add r18=r22,r18 + br.cloop.sptk.few 3b + ;; + add r18=r21,r18 + add r24=1,r24 + ;; + br.sptk.few 2b +4: + srlz.i + ;; + //purge TR entry for kernel text and data + movl r16=KERNEL_START + mov r18=KERNEL_TR_PAGE_SHIFT<<2 + ;; + ptr.i r16, r18 + ptr.d r16, r18 + ;; + srlz.i + ;; + + // purge TR entry for percpu data + movl r16=PERCPU_ADDR + mov r18=PERCPU_PAGE_SHIFT<<2 + ;; + ptr.d r16,r18 + ;; + srlz.d + ;; + + // purge TR entry for pal code + mov r16=in3 + mov r18=IA64_GRANULE_SHIFT<<2 + ;; + ptr.i r16,r18 + ;; + srlz.i + ;; + + // purge TR entry for stack + mov r16=IA64_KR(CURRENT_STACK) + ;; + shl r16=r16,IA64_GRANULE_SHIFT + movl r19=PAGE_OFFSET + ;; + add r16=r19,r16 + mov r18=IA64_GRANULE_SHIFT<<2 + ;; + ptr.d r16,r18 + ;; + srlz.i + ;; + + //copy segments + movl r16=PAGE_MASK + mov r30=in0 // in0 is page_list + br.sptk.few .dest_page + ;; +.loop: + ld8 r30=[in0], 8;; +.dest_page: + tbit.z p0, p6=r30, 0;; // 0x1 dest page +(p6) and r17=r30, r16 +(p6) br.cond.sptk.few .loop;; + + tbit.z p0, p6=r30, 1;; // 0x2 indirect page +(p6) and in0=r30, r16 +(p6) br.cond.sptk.few .loop;; + + tbit.z p0, p6=r30, 2;; // 0x4 end flag +(p6) br.cond.sptk.few .end_loop;; + + tbit.z p6, p0=r30, 3;; // 0x8 source page +(p6) br.cond.sptk.few .loop + + and r18=r30, r16 + + // simple copy page, may optimize later + movl r14=PAGE_SIZE/8 - 1;; + mov ar.lc=r14;; +1: + ld8 r14=[r18], 8;; + st8 [r17]=r14;; + fc.i r17 + add r17=8, r17 + br.ctop.sptk.few 1b + br.sptk.few .loop + ;; + +.end_loop: + sync.i // for fc.i + ;; + srlz.i + ;; + srlz.d + ;; + br.call.sptk.many b0=b6;; + +.align 32 +memory_stack: + .fill 8192, 1, 0 +memory_stack_end: +register_stack: + .fill 8192, 1, 0 +register_stack_end: +relocate_new_kernel_end: +END(relocate_new_kernel) + +.global relocate_new_kernel_size +relocate_new_kernel_size: + data8 relocate_new_kernel_end - relocate_new_kernel + +GLOBAL_ENTRY(ia64_dump_cpu_regs) + .prologue + alloc loc0=ar.pfs,1,2,0,0 + .body + mov ar.rsc=0 // put RSE in enforced lazy mode + add loc1=4*8, in0 // save r4 and r5 first + ;; +{ + flushrs // flush dirty regs to backing store + srlz.i +} + st8 [loc1]=r4, 8 + ;; + st8 [loc1]=r5, 8 + ;; + add loc1=32*8, in0 + mov r4=ar.rnat + ;; + st8 [in0]=r0, 8 // r0 + st8 [loc1]=r4, 8 // rnat + mov r5=pr + ;; + st8 [in0]=r1, 8 // r1 + st8 [loc1]=r5, 8 // pr + mov r4=b0 + ;; + st8 [in0]=r2, 8 // r2 + st8 [loc1]=r4, 8 // b0 + mov r5=b1; + ;; + st8 [in0]=r3, 24 // r3 + st8 [loc1]=r5, 8 // b1 + mov r4=b2 + ;; + st8 [in0]=r6, 8 // r6 + st8 [loc1]=r4, 8 // b2 + mov r5=b3 + ;; + st8 [in0]=r7, 8 // r7 + st8 [loc1]=r5, 8 // b3 + mov r4=b4 + ;; + st8 [in0]=r8, 8 // r8 + st8 [loc1]=r4, 8 // b4 + mov r5=b5 + ;; + st8 [in0]=r9, 8 // r9 + st8 [loc1]=r5, 8 // b5 + mov r4=b6 + ;; + st8 [in0]=r10, 8 // r10 + st8 [loc1]=r5, 8 // b6 + mov r5=b7 + ;; + st8 [in0]=r11, 8 // r11 + st8 [loc1]=r5, 8 // b7 + mov r4=b0 + ;; + st8 [in0]=r12, 8 // r12 + st8 [loc1]=r4, 8 // ip + mov r5=loc0 + ;; + st8 [in0]=r13, 8 // r13 + extr.u r5=r5, 0, 38 // ar.pfs.pfm + mov r4=r0 // user mask + ;; + st8 [in0]=r14, 8 // r14 + st8 [loc1]=r5, 8 // cfm + ;; + st8 [in0]=r15, 8 // r15 + st8 [loc1]=r4, 8 // user mask + mov r5=ar.rsc + ;; + st8 [in0]=r16, 8 // r16 + st8 [loc1]=r5, 8 // ar.rsc + mov r4=ar.bsp + ;; + st8 [in0]=r17, 8 // r17 + st8 [loc1]=r4, 8 // ar.bsp + mov r5=ar.bspstore + ;; + st8 [in0]=r18, 8 // r18 + st8 [loc1]=r5, 8 // ar.bspstore + mov r4=ar.rnat + ;; + st8 [in0]=r19, 8 // r19 + st8 [loc1]=r4, 8 // ar.rnat + mov r5=ar.ccv + ;; + st8 [in0]=r20, 8 // r20 + st8 [loc1]=r5, 8 // ar.ccv + mov r4=ar.unat + ;; + st8 [in0]=r21, 8 // r21 + st8 [loc1]=r4, 8 // ar.unat + mov r5 = ar.fpsr + ;; + st8 [in0]=r22, 8 // r22 + st8 [loc1]=r5, 8 // ar.fpsr + mov r4 = ar.unat + ;; + st8 [in0]=r23, 8 // r23 + st8 [loc1]=r4, 8 // unat + mov r5 = ar.fpsr + ;; + st8 [in0]=r24, 8 // r24 + st8 [loc1]=r5, 8 // fpsr + mov r4 = ar.pfs + ;; + st8 [in0]=r25, 8 // r25 + st8 [loc1]=r4, 8 // ar.pfs + mov r5 = ar.lc + ;; + st8 [in0]=r26, 8 // r26 + st8 [loc1]=r5, 8 // ar.lc + mov r4 = ar.ec + ;; + st8 [in0]=r27, 8 // r27 + st8 [loc1]=r4, 8 // ar.ec + mov r5 = ar.csd + ;; + st8 [in0]=r28, 8 // r28 + st8 [loc1]=r5, 8 // ar.csd + mov r4 = ar.ssd + ;; + st8 [in0]=r29, 8 // r29 + st8 [loc1]=r4, 8 // ar.ssd + ;; + st8 [in0]=r30, 8 // r30 + ;; + st8 [in0]=r31, 8 // r31 + mov ar.pfs=loc0 + ;; + br.ret.sptk.many rp +END(ia64_dump_cpu_regs) + + diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index d10404a41756..14e1200376a9 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c @@ -43,6 +43,8 @@ #include <linux/initrd.h> #include <linux/pm.h> #include <linux/cpufreq.h> +#include <linux/kexec.h> +#include <linux/crash_dump.h> #include <asm/ia32.h> #include <asm/machvec.h> @@ -252,6 +254,41 @@ reserve_memory (void) efi_memmap_init(&rsvd_region[n].start, &rsvd_region[n].end); n++; +#ifdef CONFIG_KEXEC + /* crashkernel=size@offset specifies the size to reserve for a crash + * kernel.(offset is ingored for keep compatibility with other archs) + * By reserving this memory we guarantee that linux never set's it + * up as a DMA target.Useful for holding code to do something + * appropriate after a kernel panic. + */ + { + char *from = strstr(saved_command_line, "crashkernel="); + unsigned long base, size; + if (from) { + size = memparse(from + 12, &from); + if (size) { + sort_regions(rsvd_region, n); + base = kdump_find_rsvd_region(size, + rsvd_region, n); + if (base != ~0UL) { + rsvd_region[n].start = + (unsigned long)__va(base); + rsvd_region[n].end = + (unsigned long)__va(base + size); + n++; + crashk_res.start = base; + crashk_res.end = base + size - 1; + } + } + } + efi_memmap_res.start = ia64_boot_param->efi_memmap; + efi_memmap_res.end = efi_memmap_res.start + + ia64_boot_param->efi_memmap_size; + boot_param_res.start = __pa(ia64_boot_param); + boot_param_res.end = boot_param_res.start + + sizeof(*ia64_boot_param); + } +#endif /* end of memory marker */ rsvd_region[n].start = ~0UL; rsvd_region[n].end = ~0UL; @@ -263,6 +300,7 @@ reserve_memory (void) sort_regions(rsvd_region, num_rsvd_regions); } + /** * find_initrd - get initrd parameters from the boot parameter structure * diff --git a/arch/ia64/kernel/smp.c b/arch/ia64/kernel/smp.c index 6ab95ceaf9d4..b1b9aa4364b9 100644 --- a/arch/ia64/kernel/smp.c +++ b/arch/ia64/kernel/smp.c @@ -30,6 +30,7 @@ #include <linux/delay.h> #include <linux/efi.h> #include <linux/bitops.h> +#include <linux/kexec.h> #include <asm/atomic.h> #include <asm/current.h> @@ -66,6 +67,7 @@ static volatile struct call_data_struct *call_data; #define IPI_CALL_FUNC 0 #define IPI_CPU_STOP 1 +#define IPI_KDUMP_CPU_STOP 3 /* This needs to be cacheline aligned because it is written to by *other* CPUs. */ static DEFINE_PER_CPU(u64, ipi_operation) ____cacheline_aligned; @@ -155,7 +157,11 @@ handle_IPI (int irq, void *dev_id) case IPI_CPU_STOP: stop_this_cpu(); break; - +#ifdef CONFIG_CRASH_DUMP + case IPI_KDUMP_CPU_STOP: + unw_init_running(kdump_cpu_freeze, NULL); + break; +#endif default: printk(KERN_CRIT "Unknown IPI on CPU %d: %lu\n", this_cpu, which); break; @@ -213,6 +219,26 @@ send_IPI_self (int op) send_IPI_single(smp_processor_id(), op); } +#ifdef CONFIG_CRASH_DUMP +void +kdump_smp_send_stop() +{ + send_IPI_allbutself(IPI_KDUMP_CPU_STOP); +} + +void +kdump_smp_send_init() +{ + unsigned int cpu, self_cpu; + self_cpu = smp_processor_id(); + for_each_online_cpu(cpu) { + if (cpu != self_cpu) { + if(kdump_status[cpu] == 0) + platform_send_ipi(cpu, 0, IA64_IPI_DM_INIT, 0); + } + } +} +#endif /* * Called with preeemption disabled. */ diff --git a/arch/ia64/lib/ip_fast_csum.S b/arch/ia64/lib/ip_fast_csum.S index 19674ca2acfc..1f86aeb2c948 100644 --- a/arch/ia64/lib/ip_fast_csum.S +++ b/arch/ia64/lib/ip_fast_csum.S @@ -8,8 +8,8 @@ * in0: address of buffer to checksum (char *) * in1: length of the buffer (int) * - * Copyright (C) 2002 Intel Corp. - * Copyright (C) 2002 Ken Chen <kenneth.w.chen@intel.com> + * Copyright (C) 2002, 2006 Intel Corp. + * Copyright (C) 2002, 2006 Ken Chen <kenneth.w.chen@intel.com> */ #include <asm/asmmacro.h> @@ -25,6 +25,9 @@ #define in0 r32 #define in1 r33 +#define in2 r34 +#define in3 r35 +#define in4 r36 #define ret0 r8 GLOBAL_ENTRY(ip_fast_csum) @@ -65,8 +68,9 @@ GLOBAL_ENTRY(ip_fast_csum) zxt2 r20=r20 ;; add r20=ret0,r20 + mov r9=0xffff ;; - andcm ret0=-1,r20 + andcm ret0=r9,r20 .restore sp // reset frame state br.ret.sptk.many b0 ;; @@ -88,3 +92,51 @@ GLOBAL_ENTRY(ip_fast_csum) mov b0=r34 br.ret.sptk.many b0 END(ip_fast_csum) + +GLOBAL_ENTRY(csum_ipv6_magic) + ld4 r20=[in0],4 + ld4 r21=[in1],4 + dep r15=in3,in2,32,16 + ;; + ld4 r22=[in0],4 + ld4 r23=[in1],4 + mux1 r15=r15,@rev + ;; + ld4 r24=[in0],4 + ld4 r25=[in1],4 + shr.u r15=r15,16 + add r16=r20,r21 + add r17=r22,r23 + ;; + ld4 r26=[in0],4 + ld4 r27=[in1],4 + add r18=r24,r25 + add r8=r16,r17 + ;; + add r19=r26,r27 + add r8=r8,r18 + ;; + add r8=r8,r19 + add r15=r15,in4 + ;; + add r8=r8,r15 + ;; + shr.u r10=r8,32 // now fold sum into short + zxt4 r11=r8 + ;; + add r8=r10,r11 + ;; + shr.u r10=r8,16 // yeah, keep it rolling + zxt2 r11=r8 + ;; + add r8=r10,r11 + ;; + shr.u r10=r8,16 // three times lucky + zxt2 r11=r8 + ;; + add r8=r10,r11 + mov r9=0xffff + ;; + andcm r8=r9,r8 + br.ret.sptk.many b0 +END(csum_ipv6_magic) diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c index eb92cef9cd0d..474d179966dc 100644 --- a/arch/ia64/pci/pci.c +++ b/arch/ia64/pci/pci.c @@ -125,11 +125,10 @@ alloc_pci_controller (int seg) { struct pci_controller *controller; - controller = kmalloc(sizeof(*controller), GFP_KERNEL); + controller = kzalloc(sizeof(*controller), GFP_KERNEL); if (!controller) return NULL; - memset(controller, 0, sizeof(*controller)); controller->segment = seg; controller->node = -1; return controller; diff --git a/arch/ia64/sn/kernel/irq.c b/arch/ia64/sn/kernel/irq.c index 0b49459a878a..8c5bee01eaa2 100644 --- a/arch/ia64/sn/kernel/irq.c +++ b/arch/ia64/sn/kernel/irq.c @@ -117,7 +117,10 @@ struct sn_irq_info *sn_retarget_vector(struct sn_irq_info *sn_irq_info, nasid_t nasid, int slice) { int vector; + int cpuid; +#ifdef CONFIG_SMP int cpuphys; +#endif int64_t bridge; int local_widget, status; nasid_t local_nasid; @@ -146,7 +149,6 @@ struct sn_irq_info *sn_retarget_vector(struct sn_irq_info *sn_irq_info, vector = sn_irq_info->irq_irq; /* Free the old PROM new_irq_info structure */ sn_intr_free(local_nasid, local_widget, new_irq_info); - /* Update kernels new_irq_info with new target info */ unregister_intr_pda(new_irq_info); /* allocate a new PROM new_irq_info struct */ @@ -160,8 +162,10 @@ struct sn_irq_info *sn_retarget_vector(struct sn_irq_info *sn_irq_info, return NULL; } - cpuphys = nasid_slice_to_cpuid(nasid, slice); - new_irq_info->irq_cpuid = cpuphys; + /* Update kernels new_irq_info with new target info */ + cpuid = nasid_slice_to_cpuid(new_irq_info->irq_nasid, + new_irq_info->irq_slice); + new_irq_info->irq_cpuid = cpuid; register_intr_pda(new_irq_info); pci_provider = sn_pci_provider[new_irq_info->irq_bridge_type]; @@ -180,6 +184,7 @@ struct sn_irq_info *sn_retarget_vector(struct sn_irq_info *sn_irq_info, call_rcu(&sn_irq_info->rcu, sn_irq_info_free); #ifdef CONFIG_SMP + cpuphys = cpu_physical_id(cpuid); set_irq_affinity_info((vector & 0xff), cpuphys, 0); #endif @@ -299,6 +304,9 @@ void sn_irq_fixup(struct pci_dev *pci_dev, struct sn_irq_info *sn_irq_info) nasid_t nasid = sn_irq_info->irq_nasid; int slice = sn_irq_info->irq_slice; int cpu = nasid_slice_to_cpuid(nasid, slice); +#ifdef CONFIG_SMP + int cpuphys; +#endif pci_dev_get(pci_dev); sn_irq_info->irq_cpuid = cpu; @@ -311,6 +319,10 @@ void sn_irq_fixup(struct pci_dev *pci_dev, struct sn_irq_info *sn_irq_info) spin_unlock(&sn_irq_info_lock); register_intr_pda(sn_irq_info); +#ifdef CONFIG_SMP + cpuphys = cpu_physical_id(cpu); + set_irq_affinity_info(sn_irq_info->irq_irq, cpuphys, 0); +#endif } void sn_irq_unfixup(struct pci_dev *pci_dev) diff --git a/arch/ia64/sn/kernel/msi_sn.c b/arch/ia64/sn/kernel/msi_sn.c index 6ffd1f850d41..b3a435fd70fb 100644 --- a/arch/ia64/sn/kernel/msi_sn.c +++ b/arch/ia64/sn/kernel/msi_sn.c @@ -136,10 +136,6 @@ int sn_setup_msi_irq(unsigned int irq, struct pci_dev *pdev) */ msg.data = 0x100 + irq; -#ifdef CONFIG_SMP - set_irq_affinity_info(irq, sn_irq_info->irq_cpuid, 0); -#endif - write_msi_msg(irq, &msg); set_irq_chip_and_handler(irq, &sn_msi_chip, handle_edge_irq); diff --git a/arch/ia64/sn/kernel/setup.c b/arch/ia64/sn/kernel/setup.c index 1d009f93244d..a934ad069425 100644 --- a/arch/ia64/sn/kernel/setup.c +++ b/arch/ia64/sn/kernel/setup.c @@ -769,5 +769,13 @@ int sn_prom_feature_available(int id) return 0; return test_bit(id, sn_prom_features); } + +void +sn_kernel_launch_event(void) +{ + /* ignore status until we understand possible failure, if any*/ + if (ia64_sn_kernel_launch_event()) + printk(KERN_ERR "KEXEC is not supported in this PROM, Please update the PROM.\n"); +} EXPORT_SYMBOL(sn_prom_feature_available); diff --git a/include/asm-ia64/checksum.h b/include/asm-ia64/checksum.h index bd40f4756ce1..2b78582cbd61 100644 --- a/include/asm-ia64/checksum.h +++ b/include/asm-ia64/checksum.h @@ -70,4 +70,10 @@ static inline __sum16 csum_fold(__wsum csum) return (__force __sum16)~sum; } +#define _HAVE_ARCH_IPV6_CSUM 1 +struct in6_addr; +extern unsigned short int csum_ipv6_magic(struct in6_addr *saddr, + struct in6_addr *daddr, __u32 len, unsigned short proto, + unsigned int csum); + #endif /* _ASM_IA64_CHECKSUM_H */ diff --git a/include/asm-ia64/kexec.h b/include/asm-ia64/kexec.h new file mode 100644 index 000000000000..01c36b004747 --- /dev/null +++ b/include/asm-ia64/kexec.h @@ -0,0 +1,47 @@ +#ifndef _ASM_IA64_KEXEC_H +#define _ASM_IA64_KEXEC_H + + +/* Maximum physical address we can use pages from */ +#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL) +/* Maximum address we can reach in physical address mode */ +#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL) +/* Maximum address we can use for the control code buffer */ +#define KEXEC_CONTROL_MEMORY_LIMIT TASK_SIZE + +#define KEXEC_CONTROL_CODE_SIZE (8192 + 8192 + 4096) + +/* The native architecture */ +#define KEXEC_ARCH KEXEC_ARCH_IA_64 + +#define MAX_NOTE_BYTES 1024 + +#define kexec_flush_icache_page(page) do { \ + unsigned long page_addr = (unsigned long)page_address(page); \ + flush_icache_range(page_addr, page_addr + PAGE_SIZE); \ + } while(0) + +extern struct kimage *ia64_kimage; +DECLARE_PER_CPU(u64, ia64_mca_pal_base); +const extern unsigned int relocate_new_kernel_size; +extern void relocate_new_kernel(unsigned long, unsigned long, + struct ia64_boot_param *, unsigned long); +static inline void +crash_setup_regs(struct pt_regs *newregs, struct pt_regs *oldregs) +{ +} +extern struct resource efi_memmap_res; +extern struct resource boot_param_res; +extern void kdump_smp_send_stop(void); +extern void kdump_smp_send_init(void); +extern void kexec_disable_iosapic(void); +extern void crash_save_this_cpu(void); +struct rsvd_region; +extern unsigned long kdump_find_rsvd_region(unsigned long size, + struct rsvd_region *rsvd_regions, int n); +extern void kdump_cpu_freeze(struct unw_frame_info *info, void *arg); +extern int kdump_status[]; +extern atomic_t kdump_cpu_freezed; +extern atomic_t kdump_in_progress; + +#endif /* _ASM_IA64_KEXEC_H */ diff --git a/include/asm-ia64/machvec.h b/include/asm-ia64/machvec.h index 8f784f8e45b0..a3891eb3f217 100644 --- a/include/asm-ia64/machvec.h +++ b/include/asm-ia64/machvec.h @@ -37,6 +37,7 @@ typedef int ia64_mv_pci_legacy_write_t (struct pci_bus *, u16 port, u32 val, u8 size); typedef void ia64_mv_migrate_t(struct task_struct * task); typedef void ia64_mv_pci_fixup_bus_t (struct pci_bus *); +typedef void ia64_mv_kernel_launch_event_t(void); /* DMA-mapping interface: */ typedef void ia64_mv_dma_init (void); @@ -218,6 +219,7 @@ struct ia64_machine_vector { ia64_mv_setup_msi_irq_t *setup_msi_irq; ia64_mv_teardown_msi_irq_t *teardown_msi_irq; ia64_mv_pci_fixup_bus_t *pci_fixup_bus; + ia64_mv_kernel_launch_event_t *kernel_launch_event; } __attribute__((__aligned__(16))); /* align attrib? see above comment */ #define MACHVEC_INIT(name) \ @@ -318,6 +320,9 @@ extern ia64_mv_dma_supported swiotlb_dma_supported; #ifndef platform_tlb_migrate_finish # define platform_tlb_migrate_finish machvec_noop_mm #endif +#ifndef platform_kernel_launch_event +# define platform_kernel_launch_event machvec_noop +#endif #ifndef platform_dma_init # define platform_dma_init swiotlb_init #endif diff --git a/include/asm-ia64/machvec_sn2.h b/include/asm-ia64/machvec_sn2.h index 83325f6db03e..eaa2fce0fecd 100644 --- a/include/asm-ia64/machvec_sn2.h +++ b/include/asm-ia64/machvec_sn2.h @@ -67,6 +67,7 @@ extern ia64_mv_dma_sync_sg_for_device sn_dma_sync_sg_for_device; extern ia64_mv_dma_mapping_error sn_dma_mapping_error; extern ia64_mv_dma_supported sn_dma_supported; extern ia64_mv_migrate_t sn_migrate; +extern ia64_mv_kernel_launch_event_t sn_kernel_launch_event; extern ia64_mv_setup_msi_irq_t sn_setup_msi_irq; extern ia64_mv_teardown_msi_irq_t sn_teardown_msi_irq; extern ia64_mv_pci_fixup_bus_t sn_pci_fixup_bus; @@ -121,6 +122,7 @@ extern ia64_mv_pci_fixup_bus_t sn_pci_fixup_bus; #define platform_dma_mapping_error sn_dma_mapping_error #define platform_dma_supported sn_dma_supported #define platform_migrate sn_migrate +#define platform_kernel_launch_event sn_kernel_launch_event #ifdef CONFIG_PCI_MSI #define platform_setup_msi_irq sn_setup_msi_irq #define platform_teardown_msi_irq sn_teardown_msi_irq diff --git a/include/asm-ia64/meminit.h b/include/asm-ia64/meminit.h index c3b1f862e6e7..c8df75901083 100644 --- a/include/asm-ia64/meminit.h +++ b/include/asm-ia64/meminit.h @@ -15,11 +15,12 @@ * - initrd (optional) * - command line string * - kernel code & data + * - crash dumping code reserved region * - Kernel memory map built from EFI memory map * * More could be added if necessary */ -#define IA64_MAX_RSVD_REGIONS 6 +#define IA64_MAX_RSVD_REGIONS 7 struct rsvd_region { unsigned long start; /* virtual address of beginning of element */ diff --git a/include/asm-ia64/page.h b/include/asm-ia64/page.h index 947cb72b520e..485759ba9e36 100644 --- a/include/asm-ia64/page.h +++ b/include/asm-ia64/page.h @@ -101,7 +101,7 @@ do { \ #ifdef CONFIG_VIRTUAL_MEM_MAP extern int ia64_pfn_valid (unsigned long pfn); -#elif defined(CONFIG_FLATMEM) +#else # define ia64_pfn_valid(pfn) 1 #endif @@ -110,12 +110,11 @@ extern struct page *vmem_map; #ifdef CONFIG_DISCONTIGMEM # define page_to_pfn(page) ((unsigned long) (page - vmem_map)) # define pfn_to_page(pfn) (vmem_map + (pfn)) +#else +# include <asm-generic/memory_model.h> #endif -#endif - -#if defined(CONFIG_FLATMEM) || defined(CONFIG_SPARSEMEM) -/* FLATMEM always configures mem_map (mem_map = vmem_map if necessary) */ -#include <asm-generic/memory_model.h> +#else +# include <asm-generic/memory_model.h> #endif #ifdef CONFIG_FLATMEM diff --git a/include/asm-ia64/pal.h b/include/asm-ia64/pal.h index 4283ddcc25fb..bc768153f3c9 100644 --- a/include/asm-ia64/pal.h +++ b/include/asm-ia64/pal.h @@ -20,6 +20,8 @@ * 00/05/24 eranian Updated to latest PAL spec, fix structures bugs, added * 00/05/25 eranian Support for stack calls, and static physical calls * 00/06/18 eranian Support for stacked physical calls + * 06/10/26 rja Support for Intel Itanium Architecture Software Developer's + * Manual Rev 2.2 (Jan 2006) */ /* @@ -69,6 +71,8 @@ #define PAL_PREFETCH_VISIBILITY 41 /* Make Processor Prefetches Visible */ #define PAL_LOGICAL_TO_PHYSICAL 42 /* returns information on logical to physical processor mapping */ #define PAL_CACHE_SHARED_INFO 43 /* returns information on caches shared by logical processor */ +#define PAL_GET_HW_POLICY 48 /* Get current hardware resource sharing policy */ +#define PAL_SET_HW_POLICY 49 /* Set current hardware resource sharing policy */ #define PAL_COPY_PAL 256 /* relocate PAL procedures and PAL PMI */ #define PAL_HALT_INFO 257 /* return the low power capabilities of processor */ @@ -80,6 +84,11 @@ #define PAL_SET_PSTATE 263 /* set the P-state */ #define PAL_BRAND_INFO 274 /* Processor branding information */ +#define PAL_GET_PSTATE_TYPE_LASTSET 0 +#define PAL_GET_PSTATE_TYPE_AVGANDRESET 1 +#define PAL_GET_PSTATE_TYPE_AVGNORESET 2 +#define PAL_GET_PSTATE_TYPE_INSTANT 3 + #ifndef __ASSEMBLY__ #include <linux/types.h> @@ -102,6 +111,7 @@ typedef s64 pal_status_t; * cache without sideeffects * and "restrict" was 1 */ +#define PAL_STATUS_REQUIRES_MEMORY (-9) /* Call requires PAL memory buffer */ /* Processor cache level in the heirarchy */ typedef u64 pal_cache_level_t; @@ -456,7 +466,9 @@ typedef struct pal_process_state_info_s { * by the processor */ - reserved2 : 11, + se : 1, /* Shared error. MCA in a + shared structure */ + reserved2 : 10, cc : 1, /* Cache check */ tc : 1, /* TLB check */ bc : 1, /* Bus check */ @@ -487,10 +499,12 @@ typedef struct pal_cache_check_info_s { * error occurred */ wiv : 1, /* Way field valid */ - reserved2 : 10, + reserved2 : 1, + dp : 1, /* Data poisoned on MBE */ + reserved3 : 8, index : 20, /* Cache line index */ - reserved3 : 2, + reserved4 : 2, is : 1, /* instruction set (1 == ia32) */ iv : 1, /* instruction set field valid */ @@ -557,7 +571,7 @@ typedef struct pal_bus_check_info_s { type : 8, /* Bus xaction type*/ sev : 5, /* Bus error severity*/ hier : 2, /* Bus hierarchy level */ - reserved1 : 1, + dp : 1, /* Data poisoned on MBE */ bsi : 8, /* Bus error status * info */ @@ -834,7 +848,9 @@ typedef union pal_bus_features_u { u64 pbf_req_bus_parking : 1; u64 pbf_bus_lock_mask : 1; u64 pbf_enable_half_xfer_rate : 1; - u64 pbf_reserved2 : 22; + u64 pbf_reserved2 : 20; + u64 pbf_enable_shared_line_replace : 1; + u64 pbf_enable_exclusive_line_replace : 1; u64 pbf_disable_xaction_queueing : 1; u64 pbf_disable_resp_err_check : 1; u64 pbf_disable_berr_check : 1; @@ -1077,6 +1093,24 @@ ia64_pal_freq_ratios (struct pal_freq_ratio *proc_ratio, struct pal_freq_ratio * return iprv.status; } +/* + * Get the current hardware resource sharing policy of the processor + */ +static inline s64 +ia64_pal_get_hw_policy (u64 proc_num, u64 *cur_policy, u64 *num_impacted, + u64 *la) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_GET_HW_POLICY, proc_num, 0, 0); + if (cur_policy) + *cur_policy = iprv.v0; + if (num_impacted) + *num_impacted = iprv.v1; + if (la) + *la = iprv.v2; + return iprv.status; +} + /* Make the processor enter HALT or one of the implementation dependent low * power states where prefetching and execution are suspended and cache and * TLB coherency is not maintained. @@ -1112,10 +1146,10 @@ ia64_pal_halt_info (pal_power_mgmt_info_u_t *power_buf) /* Get the current P-state information */ static inline s64 -ia64_pal_get_pstate (u64 *pstate_index) +ia64_pal_get_pstate (u64 *pstate_index, unsigned long type) { struct ia64_pal_retval iprv; - PAL_CALL_STK(iprv, PAL_GET_PSTATE, 0, 0, 0); + PAL_CALL_STK(iprv, PAL_GET_PSTATE, type, 0, 0); *pstate_index = iprv.v0; return iprv.status; } @@ -1401,6 +1435,17 @@ ia64_pal_rse_info (u64 *num_phys_stacked, pal_hints_u_t *hints) return iprv.status; } +/* + * Set the current hardware resource sharing policy of the processor + */ +static inline s64 +ia64_pal_set_hw_policy (u64 policy) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_SET_HW_POLICY, policy, 0, 0); + return iprv.status; +} + /* Cause the processor to enter SHUTDOWN state, where prefetching and execution are * suspended, but cause cache and TLB coherency to be maintained. * This is usually called in IA-32 mode. @@ -1524,12 +1569,15 @@ typedef union pal_vm_info_1_u { } pal_vm_info_1_s; } pal_vm_info_1_u_t; +#define PAL_MAX_PURGES 0xFFFF /* all ones is means unlimited */ + typedef union pal_vm_info_2_u { u64 pvi2_val; struct { u64 impl_va_msb : 8, rid_size : 8, - reserved : 48; + max_purges : 16, + reserved : 32; } pal_vm_info_2_s; } pal_vm_info_2_u_t; diff --git a/include/asm-ia64/sn/sn_sal.h b/include/asm-ia64/sn/sn_sal.h index be5d83ad7cb1..2c4004eb5a68 100644 --- a/include/asm-ia64/sn/sn_sal.h +++ b/include/asm-ia64/sn/sn_sal.h @@ -88,6 +88,8 @@ #define SN_SAL_INJECT_ERROR 0x02000067 #define SN_SAL_SET_CPU_NUMBER 0x02000068 +#define SN_SAL_KERNEL_LAUNCH_EVENT 0x02000069 + /* * Service-specific constants */ @@ -1155,4 +1157,11 @@ ia64_sn_set_cpu_number(int cpu) SAL_CALL_NOLOCK(rv, SN_SAL_SET_CPU_NUMBER, cpu, 0, 0, 0, 0, 0, 0); return rv.status; } +static inline int +ia64_sn_kernel_launch_event(void) +{ + struct ia64_sal_retval rv; + SAL_CALL_NOLOCK(rv, SN_SAL_KERNEL_LAUNCH_EVENT, 0, 0, 0, 0, 0, 0, 0); + return rv.status; +} #endif /* _ASM_IA64_SN_SN_SAL_H */ diff --git a/include/linux/kexec.h b/include/linux/kexec.h index e3abcec6c51c..d02425cdd801 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -109,6 +109,10 @@ void crash_save_cpu(struct pt_regs *regs, int cpu); extern struct kimage *kexec_image; extern struct kimage *kexec_crash_image; +#ifndef kexec_flush_icache_page +#define kexec_flush_icache_page(page) +#endif + #define KEXEC_ON_CRASH 0x00000001 #define KEXEC_ARCH_MASK 0xffff0000 @@ -134,6 +138,7 @@ extern struct resource crashk_res; typedef u32 note_buf_t[MAX_NOTE_BYTES/4]; extern note_buf_t *crash_notes; + #else /* !CONFIG_KEXEC */ struct pt_regs; struct task_struct; diff --git a/kernel/kexec.c b/kernel/kexec.c index afbbbe981be2..2a59c8a01ae0 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -852,6 +852,7 @@ static int kimage_load_crash_segment(struct kimage *image, memset(ptr + uchunk, 0, mchunk - uchunk); } result = copy_from_user(ptr, buf, uchunk); + kexec_flush_icache_page(page); kunmap(page); if (result) { result = (result < 0) ? result : -EIO; |