diff options
Diffstat (limited to 'arch/ia64')
61 files changed, 2605 insertions, 939 deletions
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 70f7eb9fed35..fcacfe291b9b 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -34,6 +34,14 @@ config RWSEM_XCHGADD_ALGORITHM bool default y +config ARCH_HAS_ILOG2_U32 + bool + default n + +config ARCH_HAS_ILOG2_U64 + bool + default n + config GENERIC_FIND_NEXT_BIT bool default y @@ -341,6 +349,7 @@ config NUMA bool "NUMA support" depends on !IA64_HP_SIM && !FLATMEM default y if IA64_SGI_SN2 + select ACPI_NUMA if ACPI help Say Y to compile the kernel to support NUMA (Non-Uniform Memory Access). This option is for configuring high-end multiprocessor @@ -433,6 +442,29 @@ config IA64_ESI source "drivers/sn/Kconfig" +config KEXEC + bool "kexec system call (EXPERIMENTAL)" + depends on EXPERIMENTAL && !IA64_HP_SIM && (!SMP || HOTPLUG_CPU) + help + kexec is a system call that implements the ability to shutdown your + current kernel, and to start another kernel. It is like a reboot + but it is indepedent of the system firmware. And like a reboot + you can start any kernel with it, not just Linux. + + The name comes from the similiarity to the exec system call. + + It is an ongoing process to be certain the hardware in a machine + is properly shutdown, so do not be surprised if this code does not + initially work for you. It may help to enable device hotplugging + support. As of this writing the exact hardware interface is + strongly in flux, so no good recommendation can be made. + +config CRASH_DUMP + bool "kernel crash dumps (EXPERIMENTAL)" + depends on EXPERIMENTAL && IA64_MCA_RECOVERY && !IA64_HP_SIM && (!SMP || HOTPLUG_CPU) + help + Generate crash dump after being started by kexec. + source "drivers/firmware/Kconfig" source "fs/Kconfig.binfmt" @@ -483,6 +515,15 @@ source "net/Kconfig" source "drivers/Kconfig" +config MSPEC + tristate "Memory special operations driver" + depends on IA64 + select IA64_UNCACHED_ALLOCATOR + help + If you have an ia64 and you want to enable memory special + operations support (formerly known as fetchop), say Y here, + otherwise say N. + source "fs/Kconfig" source "lib/Kconfig" diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c index db8e1fcfa047..ce49fe3a3b56 100644 --- a/arch/ia64/hp/common/sba_iommu.c +++ b/arch/ia64/hp/common/sba_iommu.c @@ -75,7 +75,7 @@ ** If a device prefetches beyond the end of a valid pdir entry, it will cause ** a hard failure, ie. MCA. Version 3.0 and later of the zx1 LBA should ** disconnect on 4k boundaries and prevent such issues. If the device is -** particularly agressive, this option will keep the entire pdir valid such +** particularly aggressive, this option will keep the entire pdir valid such ** that prefetching will hit a valid address. This could severely impact ** error containment, and is therefore off by default. The page that is ** used for spill-over is poisoned, so that should help debugging somewhat. @@ -258,10 +258,10 @@ static u64 prefetch_spill_page; /* ** DMA_CHUNK_SIZE is used by the SCSI mid-layer to break up -** (or rather not merge) DMA's into managable chunks. +** (or rather not merge) DMAs into manageable chunks. ** On parisc, this is more of the software/tuning constraint -** rather than the HW. I/O MMU allocation alogorithms can be -** faster with smaller size is (to some degree). +** rather than the HW. I/O MMU allocation algorithms can be +** faster with smaller sizes (to some degree). */ #define DMA_CHUNK_SIZE (BITS_PER_LONG*iovp_size) @@ -1672,15 +1672,13 @@ ioc_sac_init(struct ioc *ioc) * SAC (single address cycle) addressable, so allocate a * pseudo-device to enforce that. */ - sac = kmalloc(sizeof(*sac), GFP_KERNEL); + sac = kzalloc(sizeof(*sac), GFP_KERNEL); if (!sac) panic(PFX "Couldn't allocate struct pci_dev"); - memset(sac, 0, sizeof(*sac)); - controller = kmalloc(sizeof(*controller), GFP_KERNEL); + controller = kzalloc(sizeof(*controller), GFP_KERNEL); if (!controller) panic(PFX "Couldn't allocate struct pci_controller"); - memset(controller, 0, sizeof(*controller)); controller->iommu = ioc; sac->sysdata = controller; @@ -1737,12 +1735,10 @@ ioc_init(u64 hpa, void *handle) struct ioc *ioc; struct ioc_iommu *info; - ioc = kmalloc(sizeof(*ioc), GFP_KERNEL); + ioc = kzalloc(sizeof(*ioc), GFP_KERNEL); if (!ioc) return NULL; - memset(ioc, 0, sizeof(*ioc)); - ioc->next = ioc_list; ioc_list = ioc; diff --git a/arch/ia64/hp/sim/Kconfig b/arch/ia64/hp/sim/Kconfig index 18ccb1266e18..f92306bbedb8 100644 --- a/arch/ia64/hp/sim/Kconfig +++ b/arch/ia64/hp/sim/Kconfig @@ -13,8 +13,8 @@ config HP_SIMSERIAL_CONSOLE depends on HP_SIMSERIAL config HP_SIMSCSI - tristate "Simulated SCSI disk" - depends on SCSI + bool "Simulated SCSI disk" + depends on SCSI=y endmenu diff --git a/arch/ia64/hp/sim/hpsim_irq.c b/arch/ia64/hp/sim/hpsim_irq.c index 8145547bb52d..c2f58ff364e7 100644 --- a/arch/ia64/hp/sim/hpsim_irq.c +++ b/arch/ia64/hp/sim/hpsim_irq.c @@ -27,7 +27,7 @@ hpsim_set_affinity_noop (unsigned int a, cpumask_t b) } static struct hw_interrupt_type irq_type_hp_sim = { - .typename = "hpsim", + .name = "hpsim", .startup = hpsim_irq_startup, .shutdown = hpsim_irq_noop, .enable = hpsim_irq_noop, diff --git a/arch/ia64/hp/sim/simserial.c b/arch/ia64/hp/sim/simserial.c index caab986af70c..324ea7565e2c 100644 --- a/arch/ia64/hp/sim/simserial.c +++ b/arch/ia64/hp/sim/simserial.c @@ -209,7 +209,7 @@ static void do_serial_bh(void) } #endif -static void do_softint(void *private_) +static void do_softint(struct work_struct *private_) { printk(KERN_ERR "simserial: do_softint called\n"); } @@ -488,7 +488,7 @@ static int rs_ioctl(struct tty_struct *tty, struct file * file, #define RELEVANT_IFLAG(iflag) (iflag & (IGNBRK|BRKINT|IGNPAR|PARMRK|INPCK)) -static void rs_set_termios(struct tty_struct *tty, struct termios *old_termios) +static void rs_set_termios(struct tty_struct *tty, struct ktermios *old_termios) { unsigned int cflag = tty->termios->c_cflag; @@ -684,12 +684,11 @@ static int get_async_struct(int line, struct async_struct **ret_info) *ret_info = sstate->info; return 0; } - info = kmalloc(sizeof(struct async_struct), GFP_KERNEL); + info = kzalloc(sizeof(struct async_struct), GFP_KERNEL); if (!info) { sstate->count--; return -ENOMEM; } - memset(info, 0, sizeof(struct async_struct)); init_waitqueue_head(&info->open_wait); init_waitqueue_head(&info->close_wait); init_waitqueue_head(&info->delta_msr_wait); @@ -698,7 +697,7 @@ static int get_async_struct(int line, struct async_struct **ret_info) info->flags = sstate->flags; info->xmit_fifo_size = sstate->xmit_fifo_size; info->line = line; - INIT_WORK(&info->work, do_softint, info); + INIT_WORK(&info->work, do_softint); info->state = sstate; if (sstate->info) { kfree(info); diff --git a/arch/ia64/ia32/binfmt_elf32.c b/arch/ia64/ia32/binfmt_elf32.c index daa6b91bc921..578737ec7629 100644 --- a/arch/ia64/ia32/binfmt_elf32.c +++ b/arch/ia64/ia32/binfmt_elf32.c @@ -91,7 +91,7 @@ ia64_elf32_init (struct pt_regs *regs) * it with privilege level 3 because the IVE uses non-privileged accesses to these * tables. IA-32 segmentation is used to protect against IA-32 accesses to them. */ - vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); + vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); if (vma) { memset(vma, 0, sizeof(*vma)); vma->vm_mm = current->mm; @@ -117,7 +117,7 @@ ia64_elf32_init (struct pt_regs *regs) * code is locked in specific gate page, which is pointed by pretcode * when setup_frame_ia32 */ - vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); + vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); if (vma) { memset(vma, 0, sizeof(*vma)); vma->vm_mm = current->mm; @@ -142,7 +142,7 @@ ia64_elf32_init (struct pt_regs *regs) * Install LDT as anonymous memory. This gives us all-zero segment descriptors * until a task modifies them via modify_ldt(). */ - vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); + vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); if (vma) { memset(vma, 0, sizeof(*vma)); vma->vm_mm = current->mm; @@ -214,7 +214,7 @@ ia32_setup_arg_pages (struct linux_binprm *bprm, int executable_stack) bprm->loader += stack_base; bprm->exec += stack_base; - mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); + mpnt = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); if (!mpnt) return -ENOMEM; diff --git a/arch/ia64/ia32/ia32_support.c b/arch/ia64/ia32/ia32_support.c index c187743965a0..6af400a12ca1 100644 --- a/arch/ia64/ia32/ia32_support.c +++ b/arch/ia64/ia32/ia32_support.c @@ -249,7 +249,7 @@ ia32_init (void) #if PAGE_SHIFT > IA32_PAGE_SHIFT { - extern kmem_cache_t *partial_page_cachep; + extern struct kmem_cache *partial_page_cachep; partial_page_cachep = kmem_cache_create("partial_page_cache", sizeof(struct partial_page), 0, 0, diff --git a/arch/ia64/ia32/ia32priv.h b/arch/ia64/ia32/ia32priv.h index 703a67c934f8..cfa0bc0026b5 100644 --- a/arch/ia64/ia32/ia32priv.h +++ b/arch/ia64/ia32/ia32priv.h @@ -330,8 +330,6 @@ struct old_linux32_dirent { void ia64_elf32_init(struct pt_regs *regs); #define ELF_PLAT_INIT(_r, load_addr) ia64_elf32_init(_r) -#define elf_addr_t u32 - /* This macro yields a bitmask that programs can use to figure out what instruction set this CPU supports. */ #define ELF_HWCAP 0 diff --git a/arch/ia64/ia32/sys_ia32.c b/arch/ia64/ia32/sys_ia32.c index 9d6a3f210148..957681c39ad9 100644 --- a/arch/ia64/ia32/sys_ia32.c +++ b/arch/ia64/ia32/sys_ia32.c @@ -235,7 +235,7 @@ mmap_subpage (struct file *file, unsigned long start, unsigned long end, int pro if (!(flags & MAP_ANONYMOUS)) { /* read the file contents */ - inode = file->f_dentry->d_inode; + inode = file->f_path.dentry->d_inode; if (!inode->i_fop || !file->f_op->read || ((*file->f_op->read)(file, (char __user *) start, end - start, &off) < 0)) { @@ -254,7 +254,7 @@ mmap_subpage (struct file *file, unsigned long start, unsigned long end, int pro } /* SLAB cache for partial_page structures */ -kmem_cache_t *partial_page_cachep; +struct kmem_cache *partial_page_cachep; /* * init partial_page_list. @@ -837,7 +837,7 @@ emulate_mmap (struct file *file, unsigned long start, unsigned long len, int pro if (!is_congruent) { /* read the file contents */ - inode = file->f_dentry->d_inode; + inode = file->f_path.dentry->d_inode; if (!inode->i_fop || !file->f_op->read || ((*file->f_op->read)(file, (char __user *) pstart, pend - pstart, &poff) < 0)) diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile index cfa099b04cda..098ee605bf5e 100644 --- a/arch/ia64/kernel/Makefile +++ b/arch/ia64/kernel/Makefile @@ -28,6 +28,8 @@ obj-$(CONFIG_IA64_CYCLONE) += cyclone.o obj-$(CONFIG_CPU_FREQ) += cpufreq/ obj-$(CONFIG_IA64_MCA_RECOVERY) += mca_recovery.o obj-$(CONFIG_KPROBES) += kprobes.o jprobes.o +obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o crash.o +obj-$(CONFIG_CRASH_DUMP) += crash_dump.o obj-$(CONFIG_IA64_UNCACHED_ALLOCATOR) += uncached.o obj-$(CONFIG_AUDIT) += audit.o obj-$(CONFIG_PCI_MSI) += msi_ia64.o diff --git a/arch/ia64/kernel/cpufreq/acpi-cpufreq.c b/arch/ia64/kernel/cpufreq/acpi-cpufreq.c index 86faf221a070..088f130197ae 100644 --- a/arch/ia64/kernel/cpufreq/acpi-cpufreq.c +++ b/arch/ia64/kernel/cpufreq/acpi-cpufreq.c @@ -68,7 +68,8 @@ processor_get_pstate ( dprintk("processor_get_pstate\n"); - retval = ia64_pal_get_pstate(&pstate_index); + retval = ia64_pal_get_pstate(&pstate_index, + PAL_GET_PSTATE_TYPE_INSTANT); *value = (u32) pstate_index; if (retval) @@ -91,7 +92,7 @@ extract_clock ( dprintk("extract_clock\n"); for (i = 0; i < data->acpi_data.state_count; i++) { - if (value >= data->acpi_data.states[i].control) + if (value == data->acpi_data.states[i].status) return data->acpi_data.states[i].core_frequency; } return data->acpi_data.states[i-1].core_frequency; @@ -117,11 +118,7 @@ processor_get_freq ( goto migrate_end; } - /* - * processor_get_pstate gets the average frequency since the - * last get. So, do two PAL_get_freq()... - */ - ret = processor_get_pstate(&value); + /* processor_get_pstate gets the instantaneous frequency */ ret = processor_get_pstate(&value); if (ret) { diff --git a/arch/ia64/kernel/crash.c b/arch/ia64/kernel/crash.c new file mode 100644 index 000000000000..bc2f64d72244 --- /dev/null +++ b/arch/ia64/kernel/crash.c @@ -0,0 +1,223 @@ +/* + * arch/ia64/kernel/crash.c + * + * Architecture specific (ia64) functions for kexec based crash dumps. + * + * Created by: Khalid Aziz <khalid.aziz@hp.com> + * Copyright (C) 2005 Hewlett-Packard Development Company, L.P. + * Copyright (C) 2005 Intel Corp Zou Nan hai <nanhai.zou@intel.com> + * + */ +#include <linux/smp.h> +#include <linux/delay.h> +#include <linux/crash_dump.h> +#include <linux/bootmem.h> +#include <linux/kexec.h> +#include <linux/elfcore.h> +#include <linux/sysctl.h> +#include <linux/init.h> + +#include <asm/kdebug.h> +#include <asm/mca.h> + +int kdump_status[NR_CPUS]; +atomic_t kdump_cpu_freezed; +atomic_t kdump_in_progress; +int kdump_on_init = 1; + +static inline Elf64_Word +*append_elf_note(Elf64_Word *buf, char *name, unsigned type, void *data, + size_t data_len) +{ + struct elf_note *note = (struct elf_note *)buf; + note->n_namesz = strlen(name) + 1; + note->n_descsz = data_len; + note->n_type = type; + buf += (sizeof(*note) + 3)/4; + memcpy(buf, name, note->n_namesz); + buf += (note->n_namesz + 3)/4; + memcpy(buf, data, data_len); + buf += (data_len + 3)/4; + return buf; +} + +static void +final_note(void *buf) +{ + memset(buf, 0, sizeof(struct elf_note)); +} + +extern void ia64_dump_cpu_regs(void *); + +static DEFINE_PER_CPU(struct elf_prstatus, elf_prstatus); + +void +crash_save_this_cpu() +{ + void *buf; + unsigned long cfm, sof, sol; + + int cpu = smp_processor_id(); + struct elf_prstatus *prstatus = &per_cpu(elf_prstatus, cpu); + + elf_greg_t *dst = (elf_greg_t *)&(prstatus->pr_reg); + memset(prstatus, 0, sizeof(*prstatus)); + prstatus->pr_pid = current->pid; + + ia64_dump_cpu_regs(dst); + cfm = dst[43]; + sol = (cfm >> 7) & 0x7f; + sof = cfm & 0x7f; + dst[46] = (unsigned long)ia64_rse_skip_regs((unsigned long *)dst[46], + sof - sol); + + buf = (u64 *) per_cpu_ptr(crash_notes, cpu); + if (!buf) + return; + buf = append_elf_note(buf, "CORE", NT_PRSTATUS, prstatus, + sizeof(*prstatus)); + final_note(buf); +} + +static int +kdump_wait_cpu_freeze(void) +{ + int cpu_num = num_online_cpus() - 1; + int timeout = 1000; + while(timeout-- > 0) { + if (atomic_read(&kdump_cpu_freezed) == cpu_num) + return 0; + udelay(1000); + } + return 1; +} + +void +machine_crash_shutdown(struct pt_regs *pt) +{ + /* This function is only called after the system + * has paniced or is otherwise in a critical state. + * The minimum amount of code to allow a kexec'd kernel + * to run successfully needs to happen here. + * + * In practice this means shooting down the other cpus in + * an SMP system. + */ + kexec_disable_iosapic(); +#ifdef CONFIG_SMP + kdump_smp_send_stop(); + if (kdump_wait_cpu_freeze() && kdump_on_init) { + //not all cpu response to IPI, send INIT to freeze them + kdump_smp_send_init(); + } +#endif +} + +static void +machine_kdump_on_init(void) +{ + local_irq_disable(); + kexec_disable_iosapic(); + machine_kexec(ia64_kimage); +} + +void +kdump_cpu_freeze(struct unw_frame_info *info, void *arg) +{ + int cpuid; + local_irq_disable(); + cpuid = smp_processor_id(); + crash_save_this_cpu(); + current->thread.ksp = (__u64)info->sw - 16; + atomic_inc(&kdump_cpu_freezed); + kdump_status[cpuid] = 1; + mb(); + if (cpuid == 0) { + for (;;) + cpu_relax(); + } else + ia64_jump_to_sal(&sal_boot_rendez_state[cpuid]); +} + +static int +kdump_init_notifier(struct notifier_block *self, unsigned long val, void *data) +{ + struct ia64_mca_notify_die *nd; + struct die_args *args = data; + + if (!kdump_on_init) + return NOTIFY_DONE; + + if (val != DIE_INIT_MONARCH_ENTER && + val != DIE_INIT_SLAVE_ENTER && + val != DIE_MCA_RENDZVOUS_LEAVE && + val != DIE_MCA_MONARCH_LEAVE) + return NOTIFY_DONE; + + nd = (struct ia64_mca_notify_die *)args->err; + /* Reason code 1 means machine check rendezous*/ + if ((val == DIE_INIT_MONARCH_ENTER || DIE_INIT_SLAVE_ENTER) && + nd->sos->rv_rc == 1) + return NOTIFY_DONE; + + switch (val) { + case DIE_INIT_MONARCH_ENTER: + machine_kdump_on_init(); + break; + case DIE_INIT_SLAVE_ENTER: + unw_init_running(kdump_cpu_freeze, NULL); + break; + case DIE_MCA_RENDZVOUS_LEAVE: + if (atomic_read(&kdump_in_progress)) + unw_init_running(kdump_cpu_freeze, NULL); + break; + case DIE_MCA_MONARCH_LEAVE: + /* die_register->signr indicate if MCA is recoverable */ + if (!args->signr) + machine_kdump_on_init(); + break; + } + return NOTIFY_DONE; +} + +#ifdef CONFIG_SYSCTL +static ctl_table kdump_on_init_table[] = { + { + .ctl_name = CTL_UNNUMBERED, + .procname = "kdump_on_init", + .data = &kdump_on_init, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { .ctl_name = 0 } +}; + +static ctl_table sys_table[] = { + { + .ctl_name = CTL_KERN, + .procname = "kernel", + .mode = 0555, + .child = kdump_on_init_table, + }, + { .ctl_name = 0 } +}; +#endif + +static int +machine_crash_setup(void) +{ + static struct notifier_block kdump_init_notifier_nb = { + .notifier_call = kdump_init_notifier, + }; + int ret; + if((ret = register_die_notifier(&kdump_init_notifier_nb)) != 0) + return ret; +#ifdef CONFIG_SYSCTL + register_sysctl_table(sys_table, 0); +#endif + return 0; +} + +__initcall(machine_crash_setup); + diff --git a/arch/ia64/kernel/crash_dump.c b/arch/ia64/kernel/crash_dump.c new file mode 100644 index 000000000000..83b8c91c1408 --- /dev/null +++ b/arch/ia64/kernel/crash_dump.c @@ -0,0 +1,48 @@ +/* + * kernel/crash_dump.c - Memory preserving reboot related code. + * + * Created by: Simon Horman <horms@verge.net.au> + * Original code moved from kernel/crash.c + * Original code comment copied from the i386 version of this file + */ + +#include <linux/errno.h> +#include <linux/types.h> + +#include <linux/uaccess.h> + +/** + * copy_oldmem_page - copy one page from "oldmem" + * @pfn: page frame number to be copied + * @buf: target memory address for the copy; this can be in kernel address + * space or user address space (see @userbuf) + * @csize: number of bytes to copy + * @offset: offset in bytes into the page (based on pfn) to begin the copy + * @userbuf: if set, @buf is in user address space, use copy_to_user(), + * otherwise @buf is in kernel address space, use memcpy(). + * + * Copy a page from "oldmem". For this page, there is no pte mapped + * in the current kernel. We stitch up a pte, similar to kmap_atomic. + * + * Calling copy_to_user() in atomic context is not desirable. Hence first + * copying the data to a pre-allocated kernel page and then copying to user + * space in non-atomic context. + */ +ssize_t +copy_oldmem_page(unsigned long pfn, char *buf, + size_t csize, unsigned long offset, int userbuf) +{ + void *vaddr; + + if (!csize) + return 0; + vaddr = __va(pfn<<PAGE_SHIFT); + if (userbuf) { + if (copy_to_user(buf, (vaddr + offset), csize)) { + return -EFAULT; + } + } else + memcpy(buf, (vaddr + offset), csize); + return csize; +} + diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c index bb8770a177b5..0b25a7d4e1e4 100644 --- a/arch/ia64/kernel/efi.c +++ b/arch/ia64/kernel/efi.c @@ -26,6 +26,7 @@ #include <linux/types.h> #include <linux/time.h> #include <linux/efi.h> +#include <linux/kexec.h> #include <asm/io.h> #include <asm/kregs.h> @@ -41,7 +42,7 @@ extern efi_status_t efi_call_phys (void *, ...); struct efi efi; EXPORT_SYMBOL(efi); static efi_runtime_services_t *runtime; -static unsigned long mem_limit = ~0UL, max_addr = ~0UL; +static unsigned long mem_limit = ~0UL, max_addr = ~0UL, min_addr = 0UL; #define efi_call_virt(f, args...) (*(f))(args) @@ -224,7 +225,7 @@ efi_gettimeofday (struct timespec *ts) } static int -is_available_memory (efi_memory_desc_t *md) +is_memory_available (efi_memory_desc_t *md) { if (!(md->attribute & EFI_MEMORY_WB)) return 0; @@ -421,6 +422,8 @@ efi_init (void) mem_limit = memparse(cp + 4, &cp); } else if (memcmp(cp, "max_addr=", 9) == 0) { max_addr = GRANULEROUNDDOWN(memparse(cp + 9, &cp)); + } else if (memcmp(cp, "min_addr=", 9) == 0) { + min_addr = GRANULEROUNDDOWN(memparse(cp + 9, &cp)); } else { while (*cp != ' ' && *cp) ++cp; @@ -428,6 +431,8 @@ efi_init (void) ++cp; } } + if (min_addr != 0UL) + printk(KERN_INFO "Ignoring memory below %luMB\n", min_addr >> 20); if (max_addr != ~0UL) printk(KERN_INFO "Ignoring memory above %luMB\n", max_addr >> 20); @@ -887,14 +892,15 @@ find_memmap_space (void) } contig_high = GRANULEROUNDDOWN(contig_high); } - if (!is_available_memory(md) || md->type == EFI_LOADER_DATA) + if (!is_memory_available(md) || md->type == EFI_LOADER_DATA) continue; /* Round ends inward to granule boundaries */ as = max(contig_low, md->phys_addr); ae = min(contig_high, efi_md_end(md)); - /* keep within max_addr= command line arg */ + /* keep within max_addr= and min_addr= command line arg */ + as = max(as, min_addr); ae = min(ae, max_addr); if (ae <= as) continue; @@ -962,7 +968,7 @@ efi_memmap_init(unsigned long *s, unsigned long *e) } contig_high = GRANULEROUNDDOWN(contig_high); } - if (!is_available_memory(md)) + if (!is_memory_available(md)) continue; /* @@ -1004,7 +1010,8 @@ efi_memmap_init(unsigned long *s, unsigned long *e) } else ae = efi_md_end(md); - /* keep within max_addr= command line arg */ + /* keep within max_addr= and min_addr= command line arg */ + as = max(as, min_addr); ae = min(ae, max_addr); if (ae <= as) continue; @@ -1116,6 +1123,58 @@ efi_initialize_iomem_resources(struct resource *code_resource, */ insert_resource(res, code_resource); insert_resource(res, data_resource); +#ifdef CONFIG_KEXEC + insert_resource(res, &efi_memmap_res); + insert_resource(res, &boot_param_res); + if (crashk_res.end > crashk_res.start) + insert_resource(res, &crashk_res); +#endif } } } + +#ifdef CONFIG_KEXEC +/* find a block of memory aligned to 64M exclude reserved regions + rsvd_regions are sorted + */ +unsigned long +kdump_find_rsvd_region (unsigned long size, + struct rsvd_region *r, int n) +{ + int i; + u64 start, end; + u64 alignment = 1UL << _PAGE_SIZE_64M; + void *efi_map_start, *efi_map_end, *p; + efi_memory_desc_t *md; + u64 efi_desc_size; + + efi_map_start = __va(ia64_boot_param->efi_memmap); + efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size; + efi_desc_size = ia64_boot_param->efi_memdesc_size; + + for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) { + md = p; + if (!efi_wb(md)) + continue; + start = ALIGN(md->phys_addr, alignment); + end = efi_md_end(md); + for (i = 0; i < n; i++) { + if (__pa(r[i].start) >= start && __pa(r[i].end) < end) { + if (__pa(r[i].start) > start + size) + return start; + start = ALIGN(__pa(r[i].end), alignment); + if (i < n-1 && __pa(r[i+1].start) < start + size) + continue; + else + break; + } + } + if (end > start + size) + return start; + } + + printk(KERN_WARNING "Cannot reserve 0x%lx byte of memory for crashdump\n", + size); + return ~0UL; +} +#endif diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index 3390b7c5a63f..15234ed3a341 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S @@ -1575,7 +1575,7 @@ sys_call_table: data8 sys_mq_timedreceive // 1265 data8 sys_mq_notify data8 sys_mq_getsetattr - data8 sys_ni_syscall // reserved for kexec_load + data8 sys_kexec_load data8 sys_ni_syscall // reserved for vserver data8 sys_waitid // 1270 data8 sys_add_key diff --git a/arch/ia64/kernel/ia64_ksyms.c b/arch/ia64/kernel/ia64_ksyms.c index 879c1817bd1c..bd17190bebb6 100644 --- a/arch/ia64/kernel/ia64_ksyms.c +++ b/arch/ia64/kernel/ia64_ksyms.c @@ -14,6 +14,7 @@ EXPORT_SYMBOL(strlen); #include <asm/checksum.h> EXPORT_SYMBOL(ip_fast_csum); /* hand-coded assembly */ +EXPORT_SYMBOL(csum_ipv6_magic); #include <asm/semaphore.h> EXPORT_SYMBOL(__down); diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c index 9bf15fefa7e4..0fc5fb7865cf 100644 --- a/arch/ia64/kernel/iosapic.c +++ b/arch/ia64/kernel/iosapic.c @@ -288,6 +288,27 @@ nop (unsigned int irq) /* do nothing... */ } + +#ifdef CONFIG_KEXEC +void +kexec_disable_iosapic(void) +{ + struct iosapic_intr_info *info; + struct iosapic_rte_info *rte; + u8 vec = 0; + for (info = iosapic_intr_info; info < + iosapic_intr_info + IA64_NUM_VECTORS; ++info, ++vec) { + list_for_each_entry(rte, &info->rtes, + rte_list) { + iosapic_write(rte->addr, + IOSAPIC_RTE_LOW(rte->rte_index), + IOSAPIC_MASK|vec); + iosapic_eoi(rte->addr, vec); + } + } +} +#endif + static void mask_irq (unsigned int irq) { @@ -426,7 +447,7 @@ iosapic_end_level_irq (unsigned int irq) #define iosapic_ack_level_irq nop struct hw_interrupt_type irq_type_iosapic_level = { - .typename = "IO-SAPIC-level", + .name = "IO-SAPIC-level", .startup = iosapic_startup_level_irq, .shutdown = iosapic_shutdown_level_irq, .enable = iosapic_enable_level_irq, @@ -473,7 +494,7 @@ iosapic_ack_edge_irq (unsigned int irq) #define iosapic_end_edge_irq nop struct hw_interrupt_type irq_type_iosapic_edge = { - .typename = "IO-SAPIC-edge", + .name = "IO-SAPIC-edge", .startup = iosapic_startup_edge_irq, .shutdown = iosapic_disable_edge_irq, .enable = iosapic_enable_edge_irq, @@ -664,7 +685,7 @@ register_intr (unsigned int gsi, int vector, unsigned char delivery, printk(KERN_WARNING "%s: changing vector %d from %s to %s\n", __FUNCTION__, vector, - idesc->chip->typename, irq_type->typename); + idesc->chip->name, irq_type->name); idesc->chip = irq_type; } return 0; diff --git a/arch/ia64/kernel/irq.c b/arch/ia64/kernel/irq.c index f07c0864b0b4..54d55e4d64f7 100644 --- a/arch/ia64/kernel/irq.c +++ b/arch/ia64/kernel/irq.c @@ -76,7 +76,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]); } #endif - seq_printf(p, " %14s", irq_desc[i].chip->typename); + seq_printf(p, " %14s", irq_desc[i].chip->name); seq_printf(p, " %s", action->name); for (action=action->next; action; action = action->next) @@ -197,7 +197,7 @@ void fixup_irqs(void) struct pt_regs *old_regs = set_irq_regs(NULL); vectors_in_migration[irq]=0; - __do_IRQ(irq); + generic_handle_irq(irq); set_irq_regs(old_regs); } } diff --git a/arch/ia64/kernel/irq_ia64.c b/arch/ia64/kernel/irq_ia64.c index 9c6dafa2d0df..ba3ba8bc50be 100644 --- a/arch/ia64/kernel/irq_ia64.c +++ b/arch/ia64/kernel/irq_ia64.c @@ -186,7 +186,7 @@ ia64_handle_irq (ia64_vector vector, struct pt_regs *regs) ia64_setreg(_IA64_REG_CR_TPR, vector); ia64_srlz_d(); - __do_IRQ(local_vector_to_irq(vector)); + generic_handle_irq(local_vector_to_irq(vector)); /* * Disable interrupts and send EOI: @@ -242,7 +242,7 @@ void ia64_process_pending_intr(void) * Probably could shared code. */ vectors_in_migration[local_vector_to_irq(vector)]=0; - __do_IRQ(local_vector_to_irq(vector)); + generic_handle_irq(local_vector_to_irq(vector)); set_irq_regs(old_regs); /* diff --git a/arch/ia64/kernel/irq_lsapic.c b/arch/ia64/kernel/irq_lsapic.c index 1ab58b09f3d7..c2f07beb1759 100644 --- a/arch/ia64/kernel/irq_lsapic.c +++ b/arch/ia64/kernel/irq_lsapic.c @@ -34,7 +34,7 @@ static int lsapic_retrigger(unsigned int irq) } struct hw_interrupt_type irq_type_ia64_lsapic = { - .typename = "LSAPIC", + .name = "LSAPIC", .startup = lsapic_noop_startup, .shutdown = lsapic_noop, .enable = lsapic_noop, diff --git a/arch/ia64/kernel/jprobes.S b/arch/ia64/kernel/jprobes.S index 5cd6226f44f2..621630256c4a 100644 --- a/arch/ia64/kernel/jprobes.S +++ b/arch/ia64/kernel/jprobes.S @@ -45,13 +45,14 @@ * to the correct location. */ #include <asm/asmmacro.h> +#include <asm-ia64/break.h> /* * void jprobe_break(void) */ .section .kprobes.text, "ax" ENTRY(jprobe_break) - break.m 0x80300 + break.m __IA64_BREAK_JPROBE END(jprobe_break) /* diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c index 51217d63285e..6cb56dd4056d 100644 --- a/arch/ia64/kernel/kprobes.c +++ b/arch/ia64/kernel/kprobes.c @@ -88,6 +88,7 @@ static void __kprobes update_kprobe_inst_flag(uint template, uint slot, { p->ainsn.inst_flag = 0; p->ainsn.target_br_reg = 0; + p->ainsn.slot = slot; /* Check for Break instruction * Bits 37:40 Major opcode to be zero @@ -129,48 +130,6 @@ static void __kprobes update_kprobe_inst_flag(uint template, uint slot, /* * In this function we check to see if the instruction - * on which we are inserting kprobe is supported. - * Returns 0 if supported - * Returns -EINVAL if unsupported - */ -static int __kprobes unsupported_inst(uint template, uint slot, - uint major_opcode, - unsigned long kprobe_inst, - unsigned long addr) -{ - if (bundle_encoding[template][slot] == I) { - switch (major_opcode) { - case 0x0: //I_UNIT_MISC_OPCODE: - /* - * Check for Integer speculation instruction - * - Bit 33-35 to be equal to 0x1 - */ - if (((kprobe_inst >> 33) & 0x7) == 1) { - printk(KERN_WARNING - "Kprobes on speculation inst at <0x%lx> not supported\n", - addr); - return -EINVAL; - } - - /* - * IP relative mov instruction - * - Bit 27-35 to be equal to 0x30 - */ - if (((kprobe_inst >> 27) & 0x1FF) == 0x30) { - printk(KERN_WARNING - "Kprobes on \"mov r1=ip\" at <0x%lx> not supported\n", - addr); - return -EINVAL; - - } - } - } - return 0; -} - - -/* - * In this function we check to see if the instruction * (qp) cmpx.crel.ctype p1,p2=r2,r3 * on which we are inserting kprobe is cmp instruction * with ctype as unc. @@ -206,26 +165,136 @@ out: } /* + * In this function we check to see if the instruction + * on which we are inserting kprobe is supported. + * Returns qp value if supported + * Returns -EINVAL if unsupported + */ +static int __kprobes unsupported_inst(uint template, uint slot, + uint major_opcode, + unsigned long kprobe_inst, + unsigned long addr) +{ + int qp; + + qp = kprobe_inst & 0x3f; + if (is_cmp_ctype_unc_inst(template, slot, major_opcode, kprobe_inst)) { + if (slot == 1 && qp) { + printk(KERN_WARNING "Kprobes on cmp unc" + "instruction on slot 1 at <0x%lx>" + "is not supported\n", addr); + return -EINVAL; + + } + qp = 0; + } + else if (bundle_encoding[template][slot] == I) { + if (major_opcode == 0) { + /* + * Check for Integer speculation instruction + * - Bit 33-35 to be equal to 0x1 + */ + if (((kprobe_inst >> 33) & 0x7) == 1) { + printk(KERN_WARNING + "Kprobes on speculation inst at <0x%lx> not supported\n", + addr); + return -EINVAL; + } + /* + * IP relative mov instruction + * - Bit 27-35 to be equal to 0x30 + */ + if (((kprobe_inst >> 27) & 0x1FF) == 0x30) { + printk(KERN_WARNING + "Kprobes on \"mov r1=ip\" at <0x%lx> not supported\n", + addr); + return -EINVAL; + + } + } + else if ((major_opcode == 5) && !(kprobe_inst & (0xFUl << 33)) && + (kprobe_inst & (0x1UL << 12))) { + /* test bit instructions, tbit,tnat,tf + * bit 33-36 to be equal to 0 + * bit 12 to be equal to 1 + */ + if (slot == 1 && qp) { + printk(KERN_WARNING "Kprobes on test bit" + "instruction on slot at <0x%lx>" + "is not supported\n", addr); + return -EINVAL; + } + qp = 0; + } + } + else if (bundle_encoding[template][slot] == B) { + if (major_opcode == 7) { + /* IP-Relative Predict major code is 7 */ + printk(KERN_WARNING "Kprobes on IP-Relative" + "Predict is not supported\n"); + return -EINVAL; + } + else if (major_opcode == 2) { + /* Indirect Predict, major code is 2 + * bit 27-32 to be equal to 10 or 11 + */ + int x6=(kprobe_inst >> 27) & 0x3F; + if ((x6 == 0x10) || (x6 == 0x11)) { + printk(KERN_WARNING "Kprobes on" + "Indirect Predict is not supported\n"); + return -EINVAL; + } + } + } + /* kernel does not use float instruction, here for safety kprobe + * will judge whether it is fcmp/flass/float approximation instruction + */ + else if (unlikely(bundle_encoding[template][slot] == F)) { + if ((major_opcode == 4 || major_opcode == 5) && + (kprobe_inst & (0x1 << 12))) { + /* fcmp/fclass unc instruction */ + if (slot == 1 && qp) { + printk(KERN_WARNING "Kprobes on fcmp/fclass " + "instruction on slot at <0x%lx> " + "is not supported\n", addr); + return -EINVAL; + + } + qp = 0; + } + if ((major_opcode == 0 || major_opcode == 1) && + (kprobe_inst & (0x1UL << 33))) { + /* float Approximation instruction */ + if (slot == 1 && qp) { + printk(KERN_WARNING "Kprobes on float Approx " + "instr at <0x%lx> is not supported\n", + addr); + return -EINVAL; + } + qp = 0; + } + } + return qp; +} + +/* * In this function we override the bundle with * the break instruction at the given slot. */ static void __kprobes prepare_break_inst(uint template, uint slot, uint major_opcode, unsigned long kprobe_inst, - struct kprobe *p) + struct kprobe *p, + int qp) { unsigned long break_inst = BREAK_INST; bundle_t *bundle = &p->opcode.bundle; /* * Copy the original kprobe_inst qualifying predicate(qp) - * to the break instruction iff !is_cmp_ctype_unc_inst - * because for cmp instruction with ctype equal to unc, - * which is a special instruction always needs to be - * executed regradless of qp + * to the break instruction */ - if (!is_cmp_ctype_unc_inst(template, slot, major_opcode, kprobe_inst)) - break_inst |= (0x3f & kprobe_inst); + break_inst |= qp; switch (slot) { case 0: @@ -296,12 +365,6 @@ static int __kprobes valid_kprobe_addr(int template, int slot, return -EINVAL; } - if (slot == 1 && bundle_encoding[template][1] != L) { - printk(KERN_WARNING "Inserting kprobes on slot #1 " - "is not supported\n"); - return -EINVAL; - } - return 0; } @@ -427,6 +490,7 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) unsigned long kprobe_inst=0; unsigned int slot = addr & 0xf, template, major_opcode = 0; bundle_t *bundle; + int qp; bundle = &((kprobe_opcode_t *)kprobe_addr)->bundle; template = bundle->quad0.template; @@ -441,9 +505,9 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) /* Get kprobe_inst and major_opcode from the bundle */ get_kprobe_inst(bundle, slot, &kprobe_inst, &major_opcode); - if (unsupported_inst(template, slot, major_opcode, kprobe_inst, addr)) - return -EINVAL; - + qp = unsupported_inst(template, slot, major_opcode, kprobe_inst, addr); + if (qp < 0) + return -EINVAL; p->ainsn.insn = get_insn_slot(); if (!p->ainsn.insn) @@ -451,37 +515,63 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) memcpy(&p->opcode, kprobe_addr, sizeof(kprobe_opcode_t)); memcpy(p->ainsn.insn, kprobe_addr, sizeof(kprobe_opcode_t)); - prepare_break_inst(template, slot, major_opcode, kprobe_inst, p); + prepare_break_inst(template, slot, major_opcode, kprobe_inst, p, qp); return 0; } void __kprobes arch_arm_kprobe(struct kprobe *p) { - unsigned long addr = (unsigned long)p->addr; - unsigned long arm_addr = addr & ~0xFULL; + unsigned long arm_addr; + bundle_t *src, *dest; + + arm_addr = ((unsigned long)p->addr) & ~0xFUL; + dest = &((kprobe_opcode_t *)arm_addr)->bundle; + src = &p->opcode.bundle; flush_icache_range((unsigned long)p->ainsn.insn, (unsigned long)p->ainsn.insn + sizeof(kprobe_opcode_t)); - memcpy((char *)arm_addr, &p->opcode, sizeof(kprobe_opcode_t)); + switch (p->ainsn.slot) { + case 0: + dest->quad0.slot0 = src->quad0.slot0; + break; + case 1: + dest->quad1.slot1_p1 = src->quad1.slot1_p1; + break; + case 2: + dest->quad1.slot2 = src->quad1.slot2; + break; + } flush_icache_range(arm_addr, arm_addr + sizeof(kprobe_opcode_t)); } void __kprobes arch_disarm_kprobe(struct kprobe *p) { - unsigned long addr = (unsigned long)p->addr; - unsigned long arm_addr = addr & ~0xFULL; + unsigned long arm_addr; + bundle_t *src, *dest; + arm_addr = ((unsigned long)p->addr) & ~0xFUL; + dest = &((kprobe_opcode_t *)arm_addr)->bundle; /* p->ainsn.insn contains the original unaltered kprobe_opcode_t */ - memcpy((char *) arm_addr, (char *) p->ainsn.insn, - sizeof(kprobe_opcode_t)); + src = &p->ainsn.insn->bundle; + switch (p->ainsn.slot) { + case 0: + dest->quad0.slot0 = src->quad0.slot0; + break; + case 1: + dest->quad1.slot1_p1 = src->quad1.slot1_p1; + break; + case 2: + dest->quad1.slot2 = src->quad1.slot2; + break; + } flush_icache_range(arm_addr, arm_addr + sizeof(kprobe_opcode_t)); } void __kprobes arch_remove_kprobe(struct kprobe *p) { mutex_lock(&kprobe_mutex); - free_insn_slot(p->ainsn.insn); + free_insn_slot(p->ainsn.insn, 0); mutex_unlock(&kprobe_mutex); } /* @@ -807,7 +897,9 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self, switch(val) { case DIE_BREAK: /* err is break number from ia64_bad_break() */ - if (args->err == 0x80200 || args->err == 0x80300 || args->err == 0) + if ((args->err >> 12) == (__IA64_BREAK_KPROBE >> 12) + || args->err == __IA64_BREAK_JPROBE + || args->err == 0) if (pre_kprobes_handler(args)) ret = NOTIFY_STOP; break; @@ -851,7 +943,7 @@ static void ia64_get_bsp_cfm(struct unw_frame_info *info, void *arg) return; } } while (unw_unwind(info) >= 0); - lp->bsp = 0; + lp->bsp = NULL; lp->cfm = 0; return; } diff --git a/arch/ia64/kernel/machine_kexec.c b/arch/ia64/kernel/machine_kexec.c new file mode 100644 index 000000000000..e2ccc9f660c5 --- /dev/null +++ b/arch/ia64/kernel/machine_kexec.c @@ -0,0 +1,136 @@ +/* + * arch/ia64/kernel/machine_kexec.c + * + * Handle transition of Linux booting another kernel + * Copyright (C) 2005 Hewlett-Packard Development Comapny, L.P. + * Copyright (C) 2005 Khalid Aziz <khalid.aziz@hp.com> + * Copyright (C) 2006 Intel Corp, Zou Nan hai <nanhai.zou@intel.com> + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#include <linux/mm.h> +#include <linux/kexec.h> +#include <linux/cpu.h> +#include <linux/irq.h> +#include <asm/mmu_context.h> +#include <asm/setup.h> +#include <asm/delay.h> +#include <asm/meminit.h> + +typedef NORET_TYPE void (*relocate_new_kernel_t)( + unsigned long indirection_page, + unsigned long start_address, + struct ia64_boot_param *boot_param, + unsigned long pal_addr) ATTRIB_NORET; + +struct kimage *ia64_kimage; + +struct resource efi_memmap_res = { + .name = "EFI Memory Map", + .start = 0, + .end = 0, + .flags = IORESOURCE_BUSY | IORESOURCE_MEM +}; + +struct resource boot_param_res = { + .name = "Boot parameter", + .start = 0, + .end = 0, + .flags = IORESOURCE_BUSY | IORESOURCE_MEM +}; + + +/* + * Do what every setup is needed on image and the + * reboot code buffer to allow us to avoid allocations + * later. + */ +int machine_kexec_prepare(struct kimage *image) +{ + void *control_code_buffer; + const unsigned long *func; + + func = (unsigned long *)&relocate_new_kernel; + /* Pre-load control code buffer to minimize work in kexec path */ + control_code_buffer = page_address(image->control_code_page); + memcpy((void *)control_code_buffer, (const void *)func[0], + relocate_new_kernel_size); + flush_icache_range((unsigned long)control_code_buffer, + (unsigned long)control_code_buffer + relocate_new_kernel_size); + ia64_kimage = image; + + return 0; +} + +void machine_kexec_cleanup(struct kimage *image) +{ +} + +void machine_shutdown(void) +{ + int cpu; + + for_each_online_cpu(cpu) { + if (cpu != smp_processor_id()) + cpu_down(cpu); + } + kexec_disable_iosapic(); +} + +/* + * Do not allocate memory (or fail in any way) in machine_kexec(). + * We are past the point of no return, committed to rebooting now. + */ +extern void *efi_get_pal_addr(void); +static void ia64_machine_kexec(struct unw_frame_info *info, void *arg) +{ + struct kimage *image = arg; + relocate_new_kernel_t rnk; + void *pal_addr = efi_get_pal_addr(); + unsigned long code_addr = (unsigned long)page_address(image->control_code_page); + unsigned long vector; + int ii; + + if (image->type == KEXEC_TYPE_CRASH) { + crash_save_this_cpu(); + current->thread.ksp = (__u64)info->sw - 16; + } + + /* Interrupts aren't acceptable while we reboot */ + local_irq_disable(); + + /* Mask CMC and Performance Monitor interrupts */ + ia64_setreg(_IA64_REG_CR_PMV, 1 << 16); + ia64_setreg(_IA64_REG_CR_CMCV, 1 << 16); + + /* Mask ITV and Local Redirect Registers */ + ia64_set_itv(1 << 16); + ia64_set_lrr0(1 << 16); + ia64_set_lrr1(1 << 16); + + /* terminate possible nested in-service interrupts */ + for (ii = 0; ii < 16; ii++) + ia64_eoi(); + + /* unmask TPR and clear any pending interrupts */ + ia64_setreg(_IA64_REG_CR_TPR, 0); + ia64_srlz_d(); + vector = ia64_get_ivr(); + while (vector != IA64_SPURIOUS_INT_VECTOR) { + ia64_eoi(); + vector = ia64_get_ivr(); + } + platform_kernel_launch_event(); + rnk = (relocate_new_kernel_t)&code_addr; + (*rnk)(image->head, image->start, ia64_boot_param, + GRANULEROUNDDOWN((unsigned long) pal_addr)); + BUG(); +} + +void machine_kexec(struct kimage *image) +{ + unw_init_running(ia64_machine_kexec, image); + for(;;); +} diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c index 7cfa63a98cb3..a76add3e76a2 100644 --- a/arch/ia64/kernel/mca.c +++ b/arch/ia64/kernel/mca.c @@ -82,6 +82,7 @@ #include <asm/system.h> #include <asm/sal.h> #include <asm/mca.h> +#include <asm/kexec.h> #include <asm/irq.h> #include <asm/hw_irq.h> @@ -678,7 +679,7 @@ ia64_mca_cmc_vector_enable (void *dummy) * disable the cmc interrupt vector. */ static void -ia64_mca_cmc_vector_disable_keventd(void *unused) +ia64_mca_cmc_vector_disable_keventd(struct work_struct *unused) { on_each_cpu(ia64_mca_cmc_vector_disable, NULL, 1, 0); } @@ -690,7 +691,7 @@ ia64_mca_cmc_vector_disable_keventd(void *unused) * enable the cmc interrupt vector. */ static void -ia64_mca_cmc_vector_enable_keventd(void *unused) +ia64_mca_cmc_vector_enable_keventd(struct work_struct *unused) { on_each_cpu(ia64_mca_cmc_vector_enable, NULL, 1, 0); } @@ -1238,6 +1239,10 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw, } else { /* Dump buffered message to console */ ia64_mlogbuf_finish(1); +#ifdef CONFIG_KEXEC + atomic_set(&kdump_in_progress, 1); + monarch_cpu = -1; +#endif } if (notify_die(DIE_MCA_MONARCH_LEAVE, "MCA", regs, (long)&nd, 0, recover) == NOTIFY_STOP) @@ -1247,8 +1252,8 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw, monarch_cpu = -1; } -static DECLARE_WORK(cmc_disable_work, ia64_mca_cmc_vector_disable_keventd, NULL); -static DECLARE_WORK(cmc_enable_work, ia64_mca_cmc_vector_enable_keventd, NULL); +static DECLARE_WORK(cmc_disable_work, ia64_mca_cmc_vector_disable_keventd); +static DECLARE_WORK(cmc_enable_work, ia64_mca_cmc_vector_enable_keventd); /* * ia64_mca_cmc_int_handler diff --git a/arch/ia64/kernel/mca_drv.c b/arch/ia64/kernel/mca_drv.c index a45009d2bc90..afc1403799c9 100644 --- a/arch/ia64/kernel/mca_drv.c +++ b/arch/ia64/kernel/mca_drv.c @@ -435,6 +435,50 @@ is_mca_global(peidx_table_t *peidx, pal_bus_check_info_t *pbci, } /** + * get_target_identifier - Get the valid Cache or Bus check target identifier. + * @peidx: pointer of index of processor error section + * + * Return value: + * target address on Success / 0 on Failue + */ +static u64 +get_target_identifier(peidx_table_t *peidx) +{ + u64 target_address = 0; + sal_log_mod_error_info_t *smei; + pal_cache_check_info_t *pcci; + int i, level = 9; + + /* + * Look through the cache checks for a valid target identifier + * If more than one valid target identifier, return the one + * with the lowest cache level. + */ + for (i = 0; i < peidx_cache_check_num(peidx); i++) { + smei = (sal_log_mod_error_info_t *)peidx_cache_check(peidx, i); + if (smei->valid.target_identifier && smei->target_identifier) { + pcci = (pal_cache_check_info_t *)&(smei->check_info); + if (!target_address || (pcci->level < level)) { + target_address = smei->target_identifier; + level = pcci->level; + continue; + } + } + } + if (target_address) + return target_address; + + /* + * Look at the bus check for a valid target identifier + */ + smei = peidx_bus_check(peidx, 0); + if (smei && smei->valid.target_identifier) + return smei->target_identifier; + + return 0; +} + +/** * recover_from_read_error - Try to recover the errors which type are "read"s. * @slidx: pointer of index of SAL error record * @peidx: pointer of index of processor error section @@ -450,13 +494,14 @@ recover_from_read_error(slidx_table_t *slidx, peidx_table_t *peidx, pal_bus_check_info_t *pbci, struct ia64_sal_os_state *sos) { - sal_log_mod_error_info_t *smei; + u64 target_identifier; pal_min_state_area_t *pmsa; struct ia64_psr *psr1, *psr2; ia64_fptr_t *mca_hdlr_bh = (ia64_fptr_t*)mca_handler_bhhook; /* Is target address valid? */ - if (!pbci->tv) + target_identifier = get_target_identifier(peidx); + if (!target_identifier) return fatal_mca("target address not valid"); /* @@ -487,32 +532,28 @@ recover_from_read_error(slidx_table_t *slidx, pmsa = sos->pal_min_state; if (psr1->cpl != 0 || ((psr2->cpl != 0) && mca_recover_range(pmsa->pmsa_iip))) { - smei = peidx_bus_check(peidx, 0); - if (smei->valid.target_identifier) { - /* - * setup for resume to bottom half of MCA, - * "mca_handler_bhhook" - */ - /* pass to bhhook as argument (gr8, ...) */ - pmsa->pmsa_gr[8-1] = smei->target_identifier; - pmsa->pmsa_gr[9-1] = pmsa->pmsa_iip; - pmsa->pmsa_gr[10-1] = pmsa->pmsa_ipsr; - /* set interrupted return address (but no use) */ - pmsa->pmsa_br0 = pmsa->pmsa_iip; - /* change resume address to bottom half */ - pmsa->pmsa_iip = mca_hdlr_bh->fp; - pmsa->pmsa_gr[1-1] = mca_hdlr_bh->gp; - /* set cpl with kernel mode */ - psr2 = (struct ia64_psr *)&pmsa->pmsa_ipsr; - psr2->cpl = 0; - psr2->ri = 0; - psr2->bn = 1; - psr2->i = 0; - - return mca_recovered("user memory corruption. " + /* + * setup for resume to bottom half of MCA, + * "mca_handler_bhhook" + */ + /* pass to bhhook as argument (gr8, ...) */ + pmsa->pmsa_gr[8-1] = target_identifier; + pmsa->pmsa_gr[9-1] = pmsa->pmsa_iip; + pmsa->pmsa_gr[10-1] = pmsa->pmsa_ipsr; + /* set interrupted return address (but no use) */ + pmsa->pmsa_br0 = pmsa->pmsa_iip; + /* change resume address to bottom half */ + pmsa->pmsa_iip = mca_hdlr_bh->fp; + pmsa->pmsa_gr[1-1] = mca_hdlr_bh->gp; + /* set cpl with kernel mode */ + psr2 = (struct ia64_psr *)&pmsa->pmsa_ipsr; + psr2->cpl = 0; + psr2->ri = 0; + psr2->bn = 1; + psr2->i = 0; + + return mca_recovered("user memory corruption. " "kill affected process - recovered."); - } - } return fatal_mca("kernel context not recovered, iip 0x%lx\n", diff --git a/arch/ia64/kernel/palinfo.c b/arch/ia64/kernel/palinfo.c index 0b546e2b36ac..a71df9ae0397 100644 --- a/arch/ia64/kernel/palinfo.c +++ b/arch/ia64/kernel/palinfo.c @@ -16,6 +16,7 @@ * 02/05/2001 S.Eranian fixed module support * 10/23/2001 S.Eranian updated pal_perf_mon_info bug fixes * 03/24/2004 Ashok Raj updated to work with CPU Hotplug + * 10/26/2006 Russ Anderson updated processor features to rev 2.2 spec */ #include <linux/types.h> #include <linux/errno.h> @@ -314,13 +315,20 @@ vm_info(char *page) "Protection Key Registers(PKR) : %d\n" "Implemented bits in PKR.key : %d\n" "Hash Tag ID : 0x%x\n" - "Size of RR.rid : %d\n", + "Size of RR.rid : %d\n" + "Max Purges : ", vm_info_1.pal_vm_info_1_s.phys_add_size, vm_info_2.pal_vm_info_2_s.impl_va_msb+1, vm_info_1.pal_vm_info_1_s.max_pkr+1, vm_info_1.pal_vm_info_1_s.key_size, vm_info_1.pal_vm_info_1_s.hash_tag_id, vm_info_2.pal_vm_info_2_s.rid_size); + if (vm_info_2.pal_vm_info_2_s.max_purges == PAL_MAX_PURGES) + p += sprintf(p, "unlimited\n"); + else + p += sprintf(p, "%d\n", + vm_info_2.pal_vm_info_2_s.max_purges ? + vm_info_2.pal_vm_info_2_s.max_purges : 1); } if (ia64_pal_mem_attrib(&attrib) == 0) { @@ -467,7 +475,11 @@ static const char *proc_features[]={ NULL,NULL,NULL,NULL,NULL,NULL,NULL, NULL,NULL, NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, NULL,NULL,NULL,NULL,NULL, NULL,NULL,NULL,NULL, - NULL,NULL,NULL,NULL,NULL, + "Unimplemented instruction address fault", + "INIT, PMI, and LINT pins", + "Simple unimplemented instr addresses", + "Variable P-state performance", + "Virtual machine features implemented", "XIP,XPSR,XFS implemented", "XR1-XR3 implemented", "Disable dynamic predicate prediction", @@ -475,7 +487,11 @@ static const char *proc_features[]={ "Disable dynamic data cache prefetch", "Disable dynamic inst cache prefetch", "Disable dynamic branch prediction", - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + "Disable P-states", + "Enable MCA on Data Poisoning", + "Enable vmsw instruction", + "Enable extern environmental notification", "Disable BINIT on processor time-out", "Disable dynamic power management (DPM)", "Disable coherency", @@ -952,7 +968,6 @@ remove_palinfo_proc_entries(unsigned int hcpu) } } -#ifdef CONFIG_HOTPLUG_CPU static int palinfo_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { @@ -974,7 +989,6 @@ static struct notifier_block palinfo_cpu_notifier = .notifier_call = palinfo_cpu_callback, .priority = 0, }; -#endif static int __init palinfo_init(void) diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index 3aaede0d6981..aa94f60fa8e7 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -853,9 +853,8 @@ pfm_context_alloc(void) * allocate context descriptor * must be able to free with interrupts disabled */ - ctx = kmalloc(sizeof(pfm_context_t), GFP_KERNEL); + ctx = kzalloc(sizeof(pfm_context_t), GFP_KERNEL); if (ctx) { - memset(ctx, 0, sizeof(pfm_context_t)); DPRINT(("alloc ctx @%p\n", ctx)); } return ctx; @@ -2189,13 +2188,13 @@ pfm_alloc_fd(struct file **cfile) /* * allocate a new dcache entry */ - file->f_dentry = d_alloc(pfmfs_mnt->mnt_sb->s_root, &this); - if (!file->f_dentry) goto out; + file->f_path.dentry = d_alloc(pfmfs_mnt->mnt_sb->s_root, &this); + if (!file->f_path.dentry) goto out; - file->f_dentry->d_op = &pfmfs_dentry_operations; + file->f_path.dentry->d_op = &pfmfs_dentry_operations; - d_add(file->f_dentry, inode); - file->f_vfsmnt = mntget(pfmfs_mnt); + d_add(file->f_path.dentry, inode); + file->f_path.mnt = mntget(pfmfs_mnt); file->f_mapping = inode->i_mapping; file->f_op = &pfm_file_ops; @@ -2302,7 +2301,7 @@ pfm_smpl_buffer_alloc(struct task_struct *task, pfm_context_t *ctx, unsigned lon DPRINT(("smpl_buf @%p\n", smpl_buf)); /* allocate vma */ - vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); + vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); if (!vma) { DPRINT(("Cannot allocate vma\n")); goto error_kmem; diff --git a/arch/ia64/kernel/perfmon_montecito.h b/arch/ia64/kernel/perfmon_montecito.h index cd06ac6a686c..7f8da4c7ca67 100644 --- a/arch/ia64/kernel/perfmon_montecito.h +++ b/arch/ia64/kernel/perfmon_montecito.h @@ -45,16 +45,16 @@ static pfm_reg_desc_t pfm_mont_pmc_desc[PMU_MAX_PMCS]={ /* pmc29 */ { PFM_REG_NOTIMPL, }, /* pmc30 */ { PFM_REG_NOTIMPL, }, /* pmc31 */ { PFM_REG_NOTIMPL, }, -/* pmc32 */ { PFM_REG_CONFIG, 0, 0x30f01ffffffffff, 0x30f01ffffffffff, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}}, -/* pmc33 */ { PFM_REG_CONFIG, 0, 0x0, 0x1ffffffffff, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}}, -/* pmc34 */ { PFM_REG_CONFIG, 0, 0xf01ffffffffff, 0xf01ffffffffff, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}}, -/* pmc35 */ { PFM_REG_CONFIG, 0, 0x0, 0x1ffffffffff, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}}, +/* pmc32 */ { PFM_REG_CONFIG, 0, 0x30f01ffffffffffUL, 0x30f01ffffffffffUL, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}}, +/* pmc33 */ { PFM_REG_CONFIG, 0, 0x0, 0x1ffffffffffUL, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}}, +/* pmc34 */ { PFM_REG_CONFIG, 0, 0xf01ffffffffffUL, 0xf01ffffffffffUL, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}}, +/* pmc35 */ { PFM_REG_CONFIG, 0, 0x0, 0x1ffffffffffUL, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}}, /* pmc36 */ { PFM_REG_CONFIG, 0, 0xfffffff0, 0xf, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}}, /* pmc37 */ { PFM_REG_MONITOR, 4, 0x0, 0x3fff, NULL, pfm_mont_pmc_check, {RDEP_MONT_IEAR, 0, 0, 0}, {0, 0, 0, 0}}, /* pmc38 */ { PFM_REG_CONFIG, 0, 0xdb6, 0x2492, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}}, /* pmc39 */ { PFM_REG_MONITOR, 6, 0x0, 0xffcf, NULL, pfm_mont_pmc_check, {RDEP_MONT_ETB,0, 0, 0}, {0,0, 0, 0}}, /* pmc40 */ { PFM_REG_MONITOR, 6, 0x2000000, 0xf01cf, NULL, pfm_mont_pmc_check, {RDEP_MONT_DEAR,0, 0, 0}, {0,0, 0, 0}}, -/* pmc41 */ { PFM_REG_CONFIG, 0, 0x00002078fefefefe, 0x1e00018181818, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}}, +/* pmc41 */ { PFM_REG_CONFIG, 0, 0x00002078fefefefeUL, 0x1e00018181818UL, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}}, /* pmc42 */ { PFM_REG_MONITOR, 6, 0x0, 0x7ff4f, NULL, pfm_mont_pmc_check, {RDEP_MONT_ETB,0, 0, 0}, {0,0, 0, 0}}, { PFM_REG_END , 0, 0x0, -1, NULL, NULL, {0,}, {0,}}, /* end marker */ }; @@ -185,7 +185,7 @@ pfm_mont_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cn DPRINT(("cnum=%u val=0x%lx, using_dbreg=%d loaded=%d\n", cnum, tmpval, ctx->ctx_fl_using_dbreg, is_loaded)); if (cnum == 41 && is_loaded - && (tmpval & 0x1e00000000000) && (tmpval & 0x18181818UL) != 0x18181818UL && ctx->ctx_fl_using_dbreg == 0) { + && (tmpval & 0x1e00000000000UL) && (tmpval & 0x18181818UL) != 0x18181818UL && ctx->ctx_fl_using_dbreg == 0) { DPRINT(("pmc[%d]=0x%lx has active pmc41 settings, clearing dbr\n", cnum, tmpval)); diff --git a/arch/ia64/kernel/relocate_kernel.S b/arch/ia64/kernel/relocate_kernel.S new file mode 100644 index 000000000000..ae473e3f2a0d --- /dev/null +++ b/arch/ia64/kernel/relocate_kernel.S @@ -0,0 +1,334 @@ +/* + * arch/ia64/kernel/relocate_kernel.S + * + * Relocate kexec'able kernel and start it + * + * Copyright (C) 2005 Hewlett-Packard Development Company, L.P. + * Copyright (C) 2005 Khalid Aziz <khalid.aziz@hp.com> + * Copyright (C) 2005 Intel Corp, Zou Nan hai <nanhai.zou@intel.com> + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ +#include <asm/asmmacro.h> +#include <asm/kregs.h> +#include <asm/page.h> +#include <asm/pgtable.h> +#include <asm/mca_asm.h> + + /* Must be relocatable PIC code callable as a C function + */ +GLOBAL_ENTRY(relocate_new_kernel) + .prologue + alloc r31=ar.pfs,4,0,0,0 + .body +.reloc_entry: +{ + rsm psr.i| psr.ic + mov r2=ip +} + ;; +{ + flushrs // must be first insn in group + srlz.i +} + ;; + dep r2=0,r2,61,3 //to physical address + ;; + //first switch to physical mode + add r3=1f-.reloc_entry, r2 + movl r16 = IA64_PSR_AC|IA64_PSR_BN|IA64_PSR_IC + mov ar.rsc=0 // put RSE in enforced lazy mode + ;; + add sp=(memory_stack_end - 16 - .reloc_entry),r2 + add r8=(register_stack - .reloc_entry),r2 + ;; + mov r18=ar.rnat + mov ar.bspstore=r8 + ;; + mov cr.ipsr=r16 + mov cr.iip=r3 + mov cr.ifs=r0 + srlz.i + ;; + mov ar.rnat=r18 + rfi + ;; +1: + //physical mode code begin + mov b6=in1 + dep r28=0,in2,61,3 //to physical address + + // purge all TC entries +#define O(member) IA64_CPUINFO_##member##_OFFSET + GET_THIS_PADDR(r2, cpu_info) // load phys addr of cpu_info into r2 + ;; + addl r17=O(PTCE_STRIDE),r2 + addl r2=O(PTCE_BASE),r2 + ;; + ld8 r18=[r2],(O(PTCE_COUNT)-O(PTCE_BASE));; // r18=ptce_base + ld4 r19=[r2],4 // r19=ptce_count[0] + ld4 r21=[r17],4 // r21=ptce_stride[0] + ;; + ld4 r20=[r2] // r20=ptce_count[1] + ld4 r22=[r17] // r22=ptce_stride[1] + mov r24=r0 + ;; + adds r20=-1,r20 + ;; +#undef O +2: + cmp.ltu p6,p7=r24,r19 +(p7) br.cond.dpnt.few 4f + mov ar.lc=r20 +3: + ptc.e r18 + ;; + add r18=r22,r18 + br.cloop.sptk.few 3b + ;; + add r18=r21,r18 + add r24=1,r24 + ;; + br.sptk.few 2b +4: + srlz.i + ;; + //purge TR entry for kernel text and data + movl r16=KERNEL_START + mov r18=KERNEL_TR_PAGE_SHIFT<<2 + ;; + ptr.i r16, r18 + ptr.d r16, r18 + ;; + srlz.i + ;; + + // purge TR entry for percpu data + movl r16=PERCPU_ADDR + mov r18=PERCPU_PAGE_SHIFT<<2 + ;; + ptr.d r16,r18 + ;; + srlz.d + ;; + + // purge TR entry for pal code + mov r16=in3 + mov r18=IA64_GRANULE_SHIFT<<2 + ;; + ptr.i r16,r18 + ;; + srlz.i + ;; + + // purge TR entry for stack + mov r16=IA64_KR(CURRENT_STACK) + ;; + shl r16=r16,IA64_GRANULE_SHIFT + movl r19=PAGE_OFFSET + ;; + add r16=r19,r16 + mov r18=IA64_GRANULE_SHIFT<<2 + ;; + ptr.d r16,r18 + ;; + srlz.i + ;; + + //copy segments + movl r16=PAGE_MASK + mov r30=in0 // in0 is page_list + br.sptk.few .dest_page + ;; +.loop: + ld8 r30=[in0], 8;; +.dest_page: + tbit.z p0, p6=r30, 0;; // 0x1 dest page +(p6) and r17=r30, r16 +(p6) br.cond.sptk.few .loop;; + + tbit.z p0, p6=r30, 1;; // 0x2 indirect page +(p6) and in0=r30, r16 +(p6) br.cond.sptk.few .loop;; + + tbit.z p0, p6=r30, 2;; // 0x4 end flag +(p6) br.cond.sptk.few .end_loop;; + + tbit.z p6, p0=r30, 3;; // 0x8 source page +(p6) br.cond.sptk.few .loop + + and r18=r30, r16 + + // simple copy page, may optimize later + movl r14=PAGE_SIZE/8 - 1;; + mov ar.lc=r14;; +1: + ld8 r14=[r18], 8;; + st8 [r17]=r14;; + fc.i r17 + add r17=8, r17 + br.ctop.sptk.few 1b + br.sptk.few .loop + ;; + +.end_loop: + sync.i // for fc.i + ;; + srlz.i + ;; + srlz.d + ;; + br.call.sptk.many b0=b6;; + +.align 32 +memory_stack: + .fill 8192, 1, 0 +memory_stack_end: +register_stack: + .fill 8192, 1, 0 +register_stack_end: +relocate_new_kernel_end: +END(relocate_new_kernel) + +.global relocate_new_kernel_size +relocate_new_kernel_size: + data8 relocate_new_kernel_end - relocate_new_kernel + +GLOBAL_ENTRY(ia64_dump_cpu_regs) + .prologue + alloc loc0=ar.pfs,1,2,0,0 + .body + mov ar.rsc=0 // put RSE in enforced lazy mode + add loc1=4*8, in0 // save r4 and r5 first + ;; +{ + flushrs // flush dirty regs to backing store + srlz.i +} + st8 [loc1]=r4, 8 + ;; + st8 [loc1]=r5, 8 + ;; + add loc1=32*8, in0 + mov r4=ar.rnat + ;; + st8 [in0]=r0, 8 // r0 + st8 [loc1]=r4, 8 // rnat + mov r5=pr + ;; + st8 [in0]=r1, 8 // r1 + st8 [loc1]=r5, 8 // pr + mov r4=b0 + ;; + st8 [in0]=r2, 8 // r2 + st8 [loc1]=r4, 8 // b0 + mov r5=b1; + ;; + st8 [in0]=r3, 24 // r3 + st8 [loc1]=r5, 8 // b1 + mov r4=b2 + ;; + st8 [in0]=r6, 8 // r6 + st8 [loc1]=r4, 8 // b2 + mov r5=b3 + ;; + st8 [in0]=r7, 8 // r7 + st8 [loc1]=r5, 8 // b3 + mov r4=b4 + ;; + st8 [in0]=r8, 8 // r8 + st8 [loc1]=r4, 8 // b4 + mov r5=b5 + ;; + st8 [in0]=r9, 8 // r9 + st8 [loc1]=r5, 8 // b5 + mov r4=b6 + ;; + st8 [in0]=r10, 8 // r10 + st8 [loc1]=r5, 8 // b6 + mov r5=b7 + ;; + st8 [in0]=r11, 8 // r11 + st8 [loc1]=r5, 8 // b7 + mov r4=b0 + ;; + st8 [in0]=r12, 8 // r12 + st8 [loc1]=r4, 8 // ip + mov r5=loc0 + ;; + st8 [in0]=r13, 8 // r13 + extr.u r5=r5, 0, 38 // ar.pfs.pfm + mov r4=r0 // user mask + ;; + st8 [in0]=r14, 8 // r14 + st8 [loc1]=r5, 8 // cfm + ;; + st8 [in0]=r15, 8 // r15 + st8 [loc1]=r4, 8 // user mask + mov r5=ar.rsc + ;; + st8 [in0]=r16, 8 // r16 + st8 [loc1]=r5, 8 // ar.rsc + mov r4=ar.bsp + ;; + st8 [in0]=r17, 8 // r17 + st8 [loc1]=r4, 8 // ar.bsp + mov r5=ar.bspstore + ;; + st8 [in0]=r18, 8 // r18 + st8 [loc1]=r5, 8 // ar.bspstore + mov r4=ar.rnat + ;; + st8 [in0]=r19, 8 // r19 + st8 [loc1]=r4, 8 // ar.rnat + mov r5=ar.ccv + ;; + st8 [in0]=r20, 8 // r20 + st8 [loc1]=r5, 8 // ar.ccv + mov r4=ar.unat + ;; + st8 [in0]=r21, 8 // r21 + st8 [loc1]=r4, 8 // ar.unat + mov r5 = ar.fpsr + ;; + st8 [in0]=r22, 8 // r22 + st8 [loc1]=r5, 8 // ar.fpsr + mov r4 = ar.unat + ;; + st8 [in0]=r23, 8 // r23 + st8 [loc1]=r4, 8 // unat + mov r5 = ar.fpsr + ;; + st8 [in0]=r24, 8 // r24 + st8 [loc1]=r5, 8 // fpsr + mov r4 = ar.pfs + ;; + st8 [in0]=r25, 8 // r25 + st8 [loc1]=r4, 8 // ar.pfs + mov r5 = ar.lc + ;; + st8 [in0]=r26, 8 // r26 + st8 [loc1]=r5, 8 // ar.lc + mov r4 = ar.ec + ;; + st8 [in0]=r27, 8 // r27 + st8 [loc1]=r4, 8 // ar.ec + mov r5 = ar.csd + ;; + st8 [in0]=r28, 8 // r28 + st8 [loc1]=r5, 8 // ar.csd + mov r4 = ar.ssd + ;; + st8 [in0]=r29, 8 // r29 + st8 [loc1]=r4, 8 // ar.ssd + ;; + st8 [in0]=r30, 8 // r30 + ;; + st8 [in0]=r31, 8 // r31 + mov ar.pfs=loc0 + ;; + br.ret.sptk.many rp +END(ia64_dump_cpu_regs) + + diff --git a/arch/ia64/kernel/sal.c b/arch/ia64/kernel/sal.c index 642fdc7b969d..20bad78b5073 100644 --- a/arch/ia64/kernel/sal.c +++ b/arch/ia64/kernel/sal.c @@ -223,12 +223,13 @@ static void __init sal_desc_ap_wakeup(void *p) { } */ static int sal_cache_flush_drops_interrupts; -static void __init +void __init check_sal_cache_flush (void) { unsigned long flags; int cpu; - u64 vector; + u64 vector, cache_type = 3; + struct ia64_sal_retval isrv; cpu = get_cpu(); local_irq_save(flags); @@ -243,7 +244,10 @@ check_sal_cache_flush (void) while (!ia64_get_irr(IA64_TIMER_VECTOR)) cpu_relax(); - ia64_sal_cache_flush(3); + SAL_CALL(isrv, SAL_CACHE_FLUSH, cache_type, 0, 0, 0, 0, 0, 0); + + if (isrv.status) + printk(KERN_ERR "SAL_CAL_FLUSH failed with %ld\n", isrv.status); if (ia64_get_irr(IA64_TIMER_VECTOR)) { vector = ia64_get_ivr(); @@ -331,7 +335,6 @@ ia64_sal_init (struct ia64_sal_systab *systab) p += SAL_DESC_SIZE(*p); } - check_sal_cache_flush(); } int diff --git a/arch/ia64/kernel/salinfo.c b/arch/ia64/kernel/salinfo.c index e63b8ca5344a..e375a2f0f2c3 100644 --- a/arch/ia64/kernel/salinfo.c +++ b/arch/ia64/kernel/salinfo.c @@ -302,7 +302,7 @@ salinfo_event_open(struct inode *inode, struct file *file) static ssize_t salinfo_event_read(struct file *file, char __user *buffer, size_t count, loff_t *ppos) { - struct inode *inode = file->f_dentry->d_inode; + struct inode *inode = file->f_path.dentry->d_inode; struct proc_dir_entry *entry = PDE(inode); struct salinfo_data *data = entry->data; char cmd[32]; @@ -464,7 +464,7 @@ retry: static ssize_t salinfo_log_read(struct file *file, char __user *buffer, size_t count, loff_t *ppos) { - struct inode *inode = file->f_dentry->d_inode; + struct inode *inode = file->f_path.dentry->d_inode; struct proc_dir_entry *entry = PDE(inode); struct salinfo_data *data = entry->data; u8 *buf; @@ -525,7 +525,7 @@ salinfo_log_clear(struct salinfo_data *data, int cpu) static ssize_t salinfo_log_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos) { - struct inode *inode = file->f_dentry->d_inode; + struct inode *inode = file->f_path.dentry->d_inode; struct proc_dir_entry *entry = PDE(inode); struct salinfo_data *data = entry->data; char cmd[32]; @@ -575,7 +575,6 @@ static struct file_operations salinfo_data_fops = { .write = salinfo_log_write, }; -#ifdef CONFIG_HOTPLUG_CPU static int __devinit salinfo_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu) { @@ -620,7 +619,6 @@ static struct notifier_block salinfo_cpu_notifier = .notifier_call = salinfo_cpu_callback, .priority = 0, }; -#endif /* CONFIG_HOTPLUG_CPU */ static int __init salinfo_init(void) diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index c4caa8003492..ad567b8d432e 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c @@ -43,6 +43,8 @@ #include <linux/initrd.h> #include <linux/pm.h> #include <linux/cpufreq.h> +#include <linux/kexec.h> +#include <linux/crash_dump.h> #include <asm/ia32.h> #include <asm/machvec.h> @@ -252,6 +254,47 @@ reserve_memory (void) efi_memmap_init(&rsvd_region[n].start, &rsvd_region[n].end); n++; +#ifdef CONFIG_KEXEC + /* crashkernel=size@offset specifies the size to reserve for a crash + * kernel. If offset is 0, then it is determined automatically. + * By reserving this memory we guarantee that linux never set's it + * up as a DMA target.Useful for holding code to do something + * appropriate after a kernel panic. + */ + { + char *from = strstr(saved_command_line, "crashkernel="); + unsigned long base, size; + if (from) { + size = memparse(from + 12, &from); + if (*from == '@') + base = memparse(from+1, &from); + else + base = 0; + if (size) { + if (!base) { + sort_regions(rsvd_region, n); + base = kdump_find_rsvd_region(size, + rsvd_region, n); + } + if (base != ~0UL) { + rsvd_region[n].start = + (unsigned long)__va(base); + rsvd_region[n].end = + (unsigned long)__va(base + size); + n++; + crashk_res.start = base; + crashk_res.end = base + size - 1; + } + } + } + efi_memmap_res.start = ia64_boot_param->efi_memmap; + efi_memmap_res.end = efi_memmap_res.start + + ia64_boot_param->efi_memmap_size; + boot_param_res.start = __pa(ia64_boot_param); + boot_param_res.end = boot_param_res.start + + sizeof(*ia64_boot_param); + } +#endif /* end of memory marker */ rsvd_region[n].start = ~0UL; rsvd_region[n].end = ~0UL; @@ -263,6 +306,7 @@ reserve_memory (void) sort_regions(rsvd_region, num_rsvd_regions); } + /** * find_initrd - get initrd parameters from the boot parameter structure * @@ -396,6 +440,21 @@ static __init int setup_nomca(char *s) } early_param("nomca", setup_nomca); +#ifdef CONFIG_PROC_VMCORE +/* elfcorehdr= specifies the location of elf core header + * stored by the crashed kernel. + */ +static int __init parse_elfcorehdr(char *arg) +{ + if (!arg) + return -EINVAL; + + elfcorehdr_addr = memparse(arg, &arg); + return 0; +} +early_param("elfcorehdr", parse_elfcorehdr); +#endif /* CONFIG_PROC_VMCORE */ + void __init setup_arch (char **cmdline_p) { @@ -457,6 +516,8 @@ setup_arch (char **cmdline_p) cpu_init(); /* initialize the bootstrap CPU */ mmu_context_init(); /* initialize context_id bitmap */ + check_sal_cache_flush(); + #ifdef CONFIG_ACPI acpi_boot_init(); #endif @@ -613,6 +674,7 @@ get_model_name(__u8 family, __u8 model) { char brand[128]; + memcpy(brand, "Unknown", 8); if (ia64_pal_get_brand_info(brand)) { if (family == 0x7) memcpy(brand, "Merced", 7); @@ -620,8 +682,7 @@ get_model_name(__u8 family, __u8 model) case 0: memcpy(brand, "McKinley", 9); break; case 1: memcpy(brand, "Madison", 8); break; case 2: memcpy(brand, "Madison up to 9M cache", 23); break; - } else - memcpy(brand, "Unknown", 8); + } } if (brandname[0] == '\0') return strcpy(brandname, brand); diff --git a/arch/ia64/kernel/smp.c b/arch/ia64/kernel/smp.c index 657ac99a451c..f4c7f7769cf7 100644 --- a/arch/ia64/kernel/smp.c +++ b/arch/ia64/kernel/smp.c @@ -30,6 +30,7 @@ #include <linux/delay.h> #include <linux/efi.h> #include <linux/bitops.h> +#include <linux/kexec.h> #include <asm/atomic.h> #include <asm/current.h> @@ -66,6 +67,7 @@ static volatile struct call_data_struct *call_data; #define IPI_CALL_FUNC 0 #define IPI_CPU_STOP 1 +#define IPI_KDUMP_CPU_STOP 3 /* This needs to be cacheline aligned because it is written to by *other* CPUs. */ static DEFINE_PER_CPU(u64, ipi_operation) ____cacheline_aligned; @@ -108,7 +110,7 @@ cpu_die(void) } irqreturn_t -handle_IPI (int irq, void *dev_id, struct pt_regs *regs) +handle_IPI (int irq, void *dev_id) { int this_cpu = get_cpu(); unsigned long *pending_ipis = &__ia64_per_cpu_var(ipi_operation); @@ -155,7 +157,11 @@ handle_IPI (int irq, void *dev_id, struct pt_regs *regs) case IPI_CPU_STOP: stop_this_cpu(); break; - +#ifdef CONFIG_KEXEC + case IPI_KDUMP_CPU_STOP: + unw_init_running(kdump_cpu_freeze, NULL); + break; +#endif default: printk(KERN_CRIT "Unknown IPI on CPU %d: %lu\n", this_cpu, which); break; @@ -213,6 +219,26 @@ send_IPI_self (int op) send_IPI_single(smp_processor_id(), op); } +#ifdef CONFIG_KEXEC +void +kdump_smp_send_stop() +{ + send_IPI_allbutself(IPI_KDUMP_CPU_STOP); +} + +void +kdump_smp_send_init() +{ + unsigned int cpu, self_cpu; + self_cpu = smp_processor_id(); + for_each_online_cpu(cpu) { + if (cpu != self_cpu) { + if(kdump_status[cpu] == 0) + platform_send_ipi(cpu, 0, IA64_IPI_DM_INIT, 0); + } + } +} +#endif /* * Called with preeemption disabled. */ @@ -328,10 +354,14 @@ int smp_call_function (void (*func) (void *info), void *info, int nonatomic, int wait) { struct call_data_struct data; - int cpus = num_online_cpus()-1; + int cpus; - if (!cpus) + spin_lock(&call_lock); + cpus = num_online_cpus() - 1; + if (!cpus) { + spin_unlock(&call_lock); return 0; + } /* Can deadlock when called with interrupts disabled */ WARN_ON(irqs_disabled()); @@ -343,8 +373,6 @@ smp_call_function (void (*func) (void *info), void *info, int nonatomic, int wai if (wait) atomic_set(&data.finished, 0); - spin_lock(&call_lock); - call_data = &data; mb(); /* ensure store to call_data precedes setting of IPI_CALL_FUNC */ send_IPI_allbutself(IPI_CALL_FUNC); diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c index f7d7f5668144..b21ddecea943 100644 --- a/arch/ia64/kernel/smpboot.c +++ b/arch/ia64/kernel/smpboot.c @@ -463,15 +463,17 @@ struct pt_regs * __devinit idle_regs(struct pt_regs *regs) } struct create_idle { + struct work_struct work; struct task_struct *idle; struct completion done; int cpu; }; void -do_fork_idle(void *_c_idle) +do_fork_idle(struct work_struct *work) { - struct create_idle *c_idle = _c_idle; + struct create_idle *c_idle = + container_of(work, struct create_idle, work); c_idle->idle = fork_idle(c_idle->cpu); complete(&c_idle->done); @@ -482,10 +484,10 @@ do_boot_cpu (int sapicid, int cpu) { int timeout; struct create_idle c_idle = { + .work = __WORK_INITIALIZER(c_idle.work, do_fork_idle), .cpu = cpu, .done = COMPLETION_INITIALIZER(c_idle.done), }; - DECLARE_WORK(work, do_fork_idle, &c_idle); c_idle.idle = get_idle_for_cpu(cpu); if (c_idle.idle) { @@ -497,9 +499,9 @@ do_boot_cpu (int sapicid, int cpu) * We can't use kernel_thread since we must avoid to reschedule the child. */ if (!keventd_up() || current_is_keventd()) - work.func(work.data); + c_idle.work.func(&c_idle.work); else { - schedule_work(&work); + schedule_work(&c_idle.work); wait_for_completion(&c_idle.done); } diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c index 5629b45e89c6..687500ddb4b8 100644 --- a/arch/ia64/kernel/topology.c +++ b/arch/ia64/kernel/topology.c @@ -31,11 +31,11 @@ int arch_register_cpu(int num) { #if defined (CONFIG_ACPI) && defined (CONFIG_HOTPLUG_CPU) /* - * If CPEI cannot be re-targetted, and this is - * CPEI target, then dont create the control file + * If CPEI can be re-targetted or if this is not + * CPEI target, then it is hotpluggable */ - if (!can_cpei_retarget() && is_cpu_cpei_target(num)) - sysfs_cpus[num].cpu.no_control = 1; + if (can_cpei_retarget() || !is_cpu_cpei_target(num)) + sysfs_cpus[num].cpu.hotpluggable = 1; map_cpu_to_node(num, node_cpuid[num].nid); #endif diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c index fffa9e0826bc..ab684747036f 100644 --- a/arch/ia64/kernel/traps.c +++ b/arch/ia64/kernel/traps.c @@ -307,6 +307,15 @@ fp_emulate (int fp_fault, void *bundle, long *ipsr, long *fpsr, long *isr, long return ret.status; } +struct fpu_swa_msg { + unsigned long count; + unsigned long time; +}; +static DEFINE_PER_CPU(struct fpu_swa_msg, cpulast); +DECLARE_PER_CPU(struct fpu_swa_msg, cpulast); +static struct fpu_swa_msg last __cacheline_aligned; + + /* * Handle floating-point assist faults and traps. */ @@ -316,8 +325,6 @@ handle_fpu_swa (int fp_fault, struct pt_regs *regs, unsigned long isr) long exception, bundle[2]; unsigned long fault_ip; struct siginfo siginfo; - static int fpu_swa_count = 0; - static unsigned long last_time; fault_ip = regs->cr_iip; if (!fp_fault && (ia64_psr(regs)->ri == 0)) @@ -325,14 +332,37 @@ handle_fpu_swa (int fp_fault, struct pt_regs *regs, unsigned long isr) if (copy_from_user(bundle, (void __user *) fault_ip, sizeof(bundle))) return -1; - if (jiffies - last_time > 5*HZ) - fpu_swa_count = 0; - if ((fpu_swa_count < 4) && !(current->thread.flags & IA64_THREAD_FPEMU_NOPRINT)) { - last_time = jiffies; - ++fpu_swa_count; - printk(KERN_WARNING - "%s(%d): floating-point assist fault at ip %016lx, isr %016lx\n", - current->comm, current->pid, regs->cr_iip + ia64_psr(regs)->ri, isr); + if (!(current->thread.flags & IA64_THREAD_FPEMU_NOPRINT)) { + unsigned long count, current_jiffies = jiffies; + struct fpu_swa_msg *cp = &__get_cpu_var(cpulast); + + if (unlikely(current_jiffies > cp->time)) + cp->count = 0; + if (unlikely(cp->count < 5)) { + cp->count++; + cp->time = current_jiffies + 5 * HZ; + + /* minimize races by grabbing a copy of count BEFORE checking last.time. */ + count = last.count; + barrier(); + + /* + * Lower 4 bits are used as a count. Upper bits are a sequence + * number that is updated when count is reset. The cmpxchg will + * fail is seqno has changed. This minimizes mutiple cpus + * reseting the count. + */ + if (current_jiffies > last.time) + (void) cmpxchg_acq(&last.count, count, 16 + (count & ~15)); + + /* used fetchadd to atomically update the count */ + if ((last.count & 15) < 5 && (ia64_fetchadd(1, &last.count, acq) & 15) < 5) { + last.time = current_jiffies + 5 * HZ; + printk(KERN_WARNING + "%s(%d): floating-point assist fault at ip %016lx, isr %016lx\n", + current->comm, current->pid, regs->cr_iip + ia64_psr(regs)->ri, isr); + } + } } exception = fp_emulate(fp_fault, bundle, ®s->cr_ipsr, ®s->ar_fpsr, &isr, ®s->pr, diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S index b3b2e389d6b2..d6083a0936f4 100644 --- a/arch/ia64/kernel/vmlinux.lds.S +++ b/arch/ia64/kernel/vmlinux.lds.S @@ -128,13 +128,7 @@ SECTIONS .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) { __initcall_start = .; - *(.initcall1.init) - *(.initcall2.init) - *(.initcall3.init) - *(.initcall4.init) - *(.initcall5.init) - *(.initcall6.init) - *(.initcall7.init) + INITCALLS __initcall_end = .; } diff --git a/arch/ia64/lib/checksum.c b/arch/ia64/lib/checksum.c index beb11721d9f5..4411d9baeb21 100644 --- a/arch/ia64/lib/checksum.c +++ b/arch/ia64/lib/checksum.c @@ -33,32 +33,32 @@ from64to16 (unsigned long x) * computes the checksum of the TCP/UDP pseudo-header * returns a 16-bit checksum, already complemented. */ -unsigned short int -csum_tcpudp_magic (unsigned long saddr, unsigned long daddr, unsigned short len, - unsigned short proto, unsigned int sum) +__sum16 +csum_tcpudp_magic (__be32 saddr, __be32 daddr, unsigned short len, + unsigned short proto, __wsum sum) { - return ~from64to16(saddr + daddr + sum + ((unsigned long) ntohs(len) << 16) + - ((unsigned long) proto << 8)); + return (__force __sum16)~from64to16( + (__force u64)saddr + (__force u64)daddr + + (__force u64)sum + ((len + proto) << 8)); } EXPORT_SYMBOL(csum_tcpudp_magic); -unsigned int -csum_tcpudp_nofold (unsigned long saddr, unsigned long daddr, unsigned short len, - unsigned short proto, unsigned int sum) +__wsum +csum_tcpudp_nofold (__be32 saddr, __be32 daddr, unsigned short len, + unsigned short proto, __wsum sum) { unsigned long result; - result = (saddr + daddr + sum + - ((unsigned long) ntohs(len) << 16) + - ((unsigned long) proto << 8)); + result = (__force u64)saddr + (__force u64)daddr + + (__force u64)sum + ((len + proto) << 8); /* Fold down to 32-bits so we don't lose in the typedef-less network stack. */ /* 64 to 33 */ result = (result & 0xffffffff) + (result >> 32); /* 33 to 32 */ result = (result & 0xffffffff) + (result >> 32); - return result; + return (__force __wsum)result; } extern unsigned long do_csum (const unsigned char *, long); @@ -75,16 +75,15 @@ extern unsigned long do_csum (const unsigned char *, long); * * it's best to have buff aligned on a 32-bit boundary */ -unsigned int -csum_partial (const unsigned char * buff, int len, unsigned int sum) +__wsum csum_partial(const void *buff, int len, __wsum sum) { - unsigned long result = do_csum(buff, len); + u64 result = do_csum(buff, len); /* add in old sum, and carry.. */ - result += sum; + result += (__force u32)sum; /* 32+c bits -> 32 bits */ result = (result & 0xffffffff) + (result >> 32); - return result; + return (__force __wsum)result; } EXPORT_SYMBOL(csum_partial); @@ -93,10 +92,9 @@ EXPORT_SYMBOL(csum_partial); * this routine is used for miscellaneous IP-like checksums, mainly * in icmp.c */ -unsigned short -ip_compute_csum (unsigned char * buff, int len) +__sum16 ip_compute_csum (const void *buff, int len) { - return ~do_csum(buff,len); + return (__force __sum16)~do_csum(buff,len); } EXPORT_SYMBOL(ip_compute_csum); diff --git a/arch/ia64/lib/csum_partial_copy.c b/arch/ia64/lib/csum_partial_copy.c index 36866e8a5d2b..503dfe6d1450 100644 --- a/arch/ia64/lib/csum_partial_copy.c +++ b/arch/ia64/lib/csum_partial_copy.c @@ -104,9 +104,9 @@ out: */ extern unsigned long do_csum(const unsigned char *, long); -static unsigned int -do_csum_partial_copy_from_user (const unsigned char __user *src, unsigned char *dst, - int len, unsigned int psum, int *errp) +__wsum +csum_partial_copy_from_user(const void __user *src, void *dst, + int len, __wsum psum, int *errp) { unsigned long result; @@ -122,30 +122,17 @@ do_csum_partial_copy_from_user (const unsigned char __user *src, unsigned char * result = do_csum(dst, len); /* add in old sum, and carry.. */ - result += psum; + result += (__force u32)psum; /* 32+c bits -> 32 bits */ result = (result & 0xffffffff) + (result >> 32); - return result; -} - -unsigned int -csum_partial_copy_from_user (const unsigned char __user *src, unsigned char *dst, - int len, unsigned int sum, int *errp) -{ - if (!access_ok(VERIFY_READ, src, len)) { - *errp = -EFAULT; - memset(dst, 0, len); - return sum; - } - - return do_csum_partial_copy_from_user(src, dst, len, sum, errp); + return (__force __wsum)result; } -unsigned int -csum_partial_copy_nocheck(const unsigned char __user *src, unsigned char *dst, - int len, unsigned int sum) +__wsum +csum_partial_copy_nocheck(const void *src, void *dst, int len, __wsum sum) { - return do_csum_partial_copy_from_user(src, dst, len, sum, NULL); + return csum_partial_copy_from_user((__force const void __user *)src, + dst, len, sum, NULL); } EXPORT_SYMBOL(csum_partial_copy_nocheck); diff --git a/arch/ia64/lib/ip_fast_csum.S b/arch/ia64/lib/ip_fast_csum.S index 19674ca2acfc..1f86aeb2c948 100644 --- a/arch/ia64/lib/ip_fast_csum.S +++ b/arch/ia64/lib/ip_fast_csum.S @@ -8,8 +8,8 @@ * in0: address of buffer to checksum (char *) * in1: length of the buffer (int) * - * Copyright (C) 2002 Intel Corp. - * Copyright (C) 2002 Ken Chen <kenneth.w.chen@intel.com> + * Copyright (C) 2002, 2006 Intel Corp. + * Copyright (C) 2002, 2006 Ken Chen <kenneth.w.chen@intel.com> */ #include <asm/asmmacro.h> @@ -25,6 +25,9 @@ #define in0 r32 #define in1 r33 +#define in2 r34 +#define in3 r35 +#define in4 r36 #define ret0 r8 GLOBAL_ENTRY(ip_fast_csum) @@ -65,8 +68,9 @@ GLOBAL_ENTRY(ip_fast_csum) zxt2 r20=r20 ;; add r20=ret0,r20 + mov r9=0xffff ;; - andcm ret0=-1,r20 + andcm ret0=r9,r20 .restore sp // reset frame state br.ret.sptk.many b0 ;; @@ -88,3 +92,51 @@ GLOBAL_ENTRY(ip_fast_csum) mov b0=r34 br.ret.sptk.many b0 END(ip_fast_csum) + +GLOBAL_ENTRY(csum_ipv6_magic) + ld4 r20=[in0],4 + ld4 r21=[in1],4 + dep r15=in3,in2,32,16 + ;; + ld4 r22=[in0],4 + ld4 r23=[in1],4 + mux1 r15=r15,@rev + ;; + ld4 r24=[in0],4 + ld4 r25=[in1],4 + shr.u r15=r15,16 + add r16=r20,r21 + add r17=r22,r23 + ;; + ld4 r26=[in0],4 + ld4 r27=[in1],4 + add r18=r24,r25 + add r8=r16,r17 + ;; + add r19=r26,r27 + add r8=r8,r18 + ;; + add r8=r8,r19 + add r15=r15,in4 + ;; + add r8=r8,r15 + ;; + shr.u r10=r8,32 // now fold sum into short + zxt4 r11=r8 + ;; + add r8=r10,r11 + ;; + shr.u r10=r8,16 // yeah, keep it rolling + zxt2 r11=r8 + ;; + add r8=r10,r11 + ;; + shr.u r10=r8,16 // three times lucky + zxt2 r11=r8 + ;; + add r8=r10,r11 + mov r9=0xffff + ;; + andcm r8=r9,r8 + br.ret.sptk.many b0 +END(csum_ipv6_magic) diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c index 82deaa3a7c48..1e79551231b9 100644 --- a/arch/ia64/mm/contig.c +++ b/arch/ia64/mm/contig.c @@ -174,6 +174,12 @@ find_memory (void) reserve_bootmem(bootmap_start, bootmap_size); find_initrd(); + +#ifdef CONFIG_CRASH_DUMP + /* If we are doing a crash dump, we still need to know the real mem + * size before original memory map is * reset. */ + saved_max_pfn = max_pfn; +#endif } #ifdef CONFIG_SMP @@ -226,7 +232,6 @@ void __init paging_init (void) { unsigned long max_dma; - unsigned long nid = 0; unsigned long max_zone_pfns[MAX_NR_ZONES]; num_physpages = 0; @@ -238,7 +243,7 @@ paging_init (void) max_zone_pfns[ZONE_NORMAL] = max_low_pfn; #ifdef CONFIG_VIRTUAL_MEM_MAP - efi_memmap_walk(register_active_ranges, &nid); + efi_memmap_walk(register_active_ranges, NULL); efi_memmap_walk(find_largest_hole, (u64 *)&max_gap); if (max_gap < LARGE_GAP) { vmem_map = (struct page *) 0; diff --git a/arch/ia64/mm/hugetlbpage.c b/arch/ia64/mm/hugetlbpage.c index eee5c1cfbe32..0c7e94edc20e 100644 --- a/arch/ia64/mm/hugetlbpage.c +++ b/arch/ia64/mm/hugetlbpage.c @@ -64,14 +64,21 @@ huge_pte_offset (struct mm_struct *mm, unsigned long addr) return pte; } +int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) +{ + return 0; +} + #define mk_pte_huge(entry) { pte_val(entry) |= _PAGE_P; } /* * Don't actually need to do any preparation, but need to make sure * the address is in the right region. */ -int prepare_hugepage_range(unsigned long addr, unsigned long len) +int prepare_hugepage_range(unsigned long addr, unsigned long len, pgoff_t pgoff) { + if (pgoff & (~HPAGE_MASK >> PAGE_SHIFT)) + return -EINVAL; if (len & ~HPAGE_MASK) return -EINVAL; if (addr & ~HPAGE_MASK) diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index ff87a5cba399..1a3d8a2feb94 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -156,7 +156,7 @@ ia64_init_addr_space (void) * the problem. When the process attempts to write to the register backing store * for the first time, it will get a SEGFAULT in this case. */ - vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); + vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); if (vma) { memset(vma, 0, sizeof(*vma)); vma->vm_mm = current->mm; @@ -175,7 +175,7 @@ ia64_init_addr_space (void) /* map NaT-page at address zero to speed up speculative dereferencing of NULL: */ if (!(current->personality & MMAP_PAGE_ZERO)) { - vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); + vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); if (vma) { memset(vma, 0, sizeof(*vma)); vma->vm_mm = current->mm; @@ -595,14 +595,9 @@ find_largest_hole (u64 start, u64 end, void *arg) } int __init -register_active_ranges(u64 start, u64 end, void *nid) +register_active_ranges(u64 start, u64 end, void *arg) { - BUG_ON(nid == NULL); - BUG_ON(*(unsigned long *)nid >= MAX_NUMNODES); - - add_active_range(*(unsigned long *)nid, - __pa(start) >> PAGE_SHIFT, - __pa(end) >> PAGE_SHIFT); + add_active_range(0, __pa(start) >> PAGE_SHIFT, __pa(end) >> PAGE_SHIFT); return 0; } #endif /* CONFIG_VIRTUAL_MEM_MAP */ diff --git a/arch/ia64/pci/Makefile b/arch/ia64/pci/Makefile index e66889e6922a..fb14dc520d2d 100644 --- a/arch/ia64/pci/Makefile +++ b/arch/ia64/pci/Makefile @@ -1,4 +1,4 @@ # # Makefile for the ia64-specific parts of the pci bus # -obj-y := pci.o +obj-y := pci.o fixup.o diff --git a/arch/ia64/pci/fixup.c b/arch/ia64/pci/fixup.c new file mode 100644 index 000000000000..245dc1fedc24 --- /dev/null +++ b/arch/ia64/pci/fixup.c @@ -0,0 +1,69 @@ +/* + * Exceptions for specific devices. Usually work-arounds for fatal design flaws. + * Derived from fixup.c of i386 tree. + */ + +#include <linux/pci.h> +#include <linux/init.h> + +#include <asm/machvec.h> + +/* + * Fixup to mark boot BIOS video selected by BIOS before it changes + * + * From information provided by "Jon Smirl" <jonsmirl@gmail.com> + * + * The standard boot ROM sequence for an x86 machine uses the BIOS + * to select an initial video card for boot display. This boot video + * card will have it's BIOS copied to C0000 in system RAM. + * IORESOURCE_ROM_SHADOW is used to associate the boot video + * card with this copy. On laptops this copy has to be used since + * the main ROM may be compressed or combined with another image. + * See pci_map_rom() for use of this flag. IORESOURCE_ROM_SHADOW + * is marked here since the boot video device will be the only enabled + * video device at this point. + */ + +static void __devinit pci_fixup_video(struct pci_dev *pdev) +{ + struct pci_dev *bridge; + struct pci_bus *bus; + u16 config; + + if ((strcmp(platform_name, "dig") != 0) + && (strcmp(platform_name, "hpzx1") != 0)) + return; + /* Maybe, this machine supports legacy memory map. */ + + if ((pdev->class >> 8) != PCI_CLASS_DISPLAY_VGA) + return; + + /* Is VGA routed to us? */ + bus = pdev->bus; + while (bus) { + bridge = bus->self; + + /* + * From information provided by + * "David Miller" <davem@davemloft.net> + * The bridge control register is valid for PCI header + * type BRIDGE, or CARDBUS. Host to PCI controllers use + * PCI header type NORMAL. + */ + if (bridge + &&((bridge->hdr_type == PCI_HEADER_TYPE_BRIDGE) + ||(bridge->hdr_type == PCI_HEADER_TYPE_CARDBUS))) { + pci_read_config_word(bridge, PCI_BRIDGE_CONTROL, + &config); + if (!(config & PCI_BRIDGE_CTL_VGA)) + return; + } + bus = bus->parent; + } + pci_read_config_word(pdev, PCI_COMMAND, &config); + if (config & (PCI_COMMAND_IO | PCI_COMMAND_MEMORY)) { + pdev->resource[PCI_ROM_RESOURCE].flags |= IORESOURCE_ROM_SHADOW; + printk(KERN_DEBUG "Boot video device is %s\n", pci_name(pdev)); + } +} +DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pci_fixup_video); diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c index b30be7c48ba8..474d179966dc 100644 --- a/arch/ia64/pci/pci.c +++ b/arch/ia64/pci/pci.c @@ -125,11 +125,10 @@ alloc_pci_controller (int seg) { struct pci_controller *controller; - controller = kmalloc(sizeof(*controller), GFP_KERNEL); + controller = kzalloc(sizeof(*controller), GFP_KERNEL); if (!controller) return NULL; - memset(controller, 0, sizeof(*controller)); controller->segment = seg; controller->node = -1; return controller; @@ -469,10 +468,11 @@ pcibios_fixup_resources(struct pci_dev *dev, int start, int limit) } } -static void __devinit pcibios_fixup_device_resources(struct pci_dev *dev) +void __devinit pcibios_fixup_device_resources(struct pci_dev *dev) { pcibios_fixup_resources(dev, 0, PCI_BRIDGE_RESOURCES); } +EXPORT_SYMBOL_GPL(pcibios_fixup_device_resources); static void __devinit pcibios_fixup_bridge_resources(struct pci_dev *dev) { @@ -493,6 +493,7 @@ pcibios_fixup_bus (struct pci_bus *b) } list_for_each_entry(dev, &b->devices, bus_list) pcibios_fixup_device_resources(dev); + platform_pci_fixup_bus(b); return; } @@ -562,8 +563,8 @@ pcibios_enable_device (struct pci_dev *dev, int mask) void pcibios_disable_device (struct pci_dev *dev) { - if (dev->is_enabled) - acpi_pci_irq_disable(dev); + BUG_ON(atomic_read(&dev->enable_cnt)); + acpi_pci_irq_disable(dev); } void @@ -738,75 +739,44 @@ int ia64_pci_legacy_write(struct pci_bus *bus, u16 port, u32 val, u8 size) return ret; } +/* It's defined in drivers/pci/pci.c */ +extern u8 pci_cache_line_size; + /** - * pci_cacheline_size - determine cacheline size for PCI devices - * @dev: void + * set_pci_cacheline_size - determine cacheline size for PCI devices * * We want to use the line-size of the outer-most cache. We assume * that this line-size is the same for all CPUs. * * Code mostly taken from arch/ia64/kernel/palinfo.c:cache_info(). - * - * RETURNS: An appropriate -ERRNO error value on eror, or zero for success. */ -static unsigned long -pci_cacheline_size (void) +static void __init set_pci_cacheline_size(void) { u64 levels, unique_caches; s64 status; pal_cache_config_info_t cci; - static u8 cacheline_size; - - if (cacheline_size) - return cacheline_size; status = ia64_pal_cache_summary(&levels, &unique_caches); if (status != 0) { - printk(KERN_ERR "%s: ia64_pal_cache_summary() failed (status=%ld)\n", - __FUNCTION__, status); - return SMP_CACHE_BYTES; + printk(KERN_ERR "%s: ia64_pal_cache_summary() failed " + "(status=%ld)\n", __FUNCTION__, status); + return; } - status = ia64_pal_cache_config_info(levels - 1, /* cache_type (data_or_unified)= */ 2, - &cci); + status = ia64_pal_cache_config_info(levels - 1, + /* cache_type (data_or_unified)= */ 2, &cci); if (status != 0) { - printk(KERN_ERR "%s: ia64_pal_cache_config_info() failed (status=%ld)\n", - __FUNCTION__, status); - return SMP_CACHE_BYTES; + printk(KERN_ERR "%s: ia64_pal_cache_config_info() failed " + "(status=%ld)\n", __FUNCTION__, status); + return; } - cacheline_size = 1 << cci.pcci_line_size; - return cacheline_size; + pci_cache_line_size = (1 << cci.pcci_line_size) / 4; } -/** - * pcibios_prep_mwi - helper function for drivers/pci/pci.c:pci_set_mwi() - * @dev: the PCI device for which MWI is enabled - * - * For ia64, we can get the cacheline sizes from PAL. - * - * RETURNS: An appropriate -ERRNO error value on eror, or zero for success. - */ -int -pcibios_prep_mwi (struct pci_dev *dev) -{ - unsigned long desired_linesize, current_linesize; - int rc = 0; - u8 pci_linesize; - - desired_linesize = pci_cacheline_size(); - - pci_read_config_byte(dev, PCI_CACHE_LINE_SIZE, &pci_linesize); - current_linesize = 4 * pci_linesize; - if (desired_linesize != current_linesize) { - printk(KERN_WARNING "PCI: slot %s has incorrect PCI cache line size of %lu bytes,", - pci_name(dev), current_linesize); - if (current_linesize > desired_linesize) { - printk(" expected %lu bytes instead\n", desired_linesize); - rc = -EINVAL; - } else { - printk(" correcting to %lu\n", desired_linesize); - pci_write_config_byte(dev, PCI_CACHE_LINE_SIZE, desired_linesize / 4); - } - } - return rc; +static int __init pcibios_init(void) +{ + set_pci_cacheline_size(); + return 0; } + +subsys_initcall(pcibios_init); diff --git a/arch/ia64/sn/kernel/Makefile b/arch/ia64/sn/kernel/Makefile index 2d78f34dd763..0a59371d3475 100644 --- a/arch/ia64/sn/kernel/Makefile +++ b/arch/ia64/sn/kernel/Makefile @@ -4,13 +4,14 @@ # License. See the file "COPYING" in the main directory of this archive # for more details. # -# Copyright (C) 1999,2001-2005 Silicon Graphics, Inc. All Rights Reserved. +# Copyright (C) 1999,2001-2006 Silicon Graphics, Inc. All Rights Reserved. # CPPFLAGS += -I$(srctree)/arch/ia64/sn/include obj-y += setup.o bte.o bte_error.o irq.o mca.o idle.o \ - huberror.o io_init.o iomv.o klconflib.o pio_phys.o \ + huberror.o io_acpi_init.o io_common.o \ + io_init.o iomv.o klconflib.o pio_phys.o \ sn2/ obj-$(CONFIG_IA64_GENERIC) += machvec.o obj-$(CONFIG_SGI_TIOCX) += tiocx.o diff --git a/arch/ia64/sn/kernel/bte.c b/arch/ia64/sn/kernel/bte.c index 7f73ad4408aa..ff1c55601178 100644 --- a/arch/ia64/sn/kernel/bte.c +++ b/arch/ia64/sn/kernel/bte.c @@ -381,14 +381,13 @@ bte_result_t bte_unaligned_copy(u64 src, u64 dest, u64 len, u64 mode) * bcopy to the destination. */ - /* Add the leader from source */ - headBteLen = len + (src & L1_CACHE_MASK); - /* Add the trailing bytes from footer. */ - headBteLen += L1_CACHE_BYTES - (headBteLen & L1_CACHE_MASK); - headBteSource = src & ~L1_CACHE_MASK; headBcopySrcOffset = src & L1_CACHE_MASK; headBcopyDest = dest; headBcopyLen = len; + + headBteSource = src - headBcopySrcOffset; + /* Add the leading and trailing bytes from source */ + headBteLen = L1_CACHE_ALIGN(len + headBcopySrcOffset); } if (headBcopyLen > 0) { diff --git a/arch/ia64/sn/kernel/io_acpi_init.c b/arch/ia64/sn/kernel/io_acpi_init.c new file mode 100644 index 000000000000..99d7f278612a --- /dev/null +++ b/arch/ia64/sn/kernel/io_acpi_init.c @@ -0,0 +1,231 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2006 Silicon Graphics, Inc. All rights reserved. + */ + +#include <asm/sn/types.h> +#include <asm/sn/addrs.h> +#include <asm/sn/pcidev.h> +#include <asm/sn/pcibus_provider_defs.h> +#include <asm/sn/sn_sal.h> +#include "xtalk/hubdev.h" +#include <linux/acpi.h> + + +/* + * The code in this file will only be executed when running with + * a PROM that has ACPI IO support. (i.e., SN_ACPI_BASE_SUPPORT() == 1) + */ + + +/* + * This value must match the UUID the PROM uses + * (io/acpi/defblk.c) when building a vendor descriptor. + */ +struct acpi_vendor_uuid sn_uuid = { + .subtype = 0, + .data = { 0x2c, 0xc6, 0xa6, 0xfe, 0x9c, 0x44, 0xda, 0x11, + 0xa2, 0x7c, 0x08, 0x00, 0x69, 0x13, 0xea, 0x51 }, +}; + +/* + * Perform the early IO init in PROM. + */ +static s64 +sal_ioif_init(u64 *result) +{ + struct ia64_sal_retval isrv = {0,0,0,0}; + + SAL_CALL_NOLOCK(isrv, + SN_SAL_IOIF_INIT, 0, 0, 0, 0, 0, 0, 0); + *result = isrv.v0; + return isrv.status; +} + +/* + * sn_hubdev_add - The 'add' function of the acpi_sn_hubdev_driver. + * Called for every "SGIHUB" or "SGITIO" device defined + * in the ACPI namespace. + */ +static int __init +sn_hubdev_add(struct acpi_device *device) +{ + struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; + u64 addr; + struct hubdev_info *hubdev; + struct hubdev_info *hubdev_ptr; + int i; + u64 nasid; + struct acpi_resource *resource; + int ret = 0; + acpi_status status; + struct acpi_resource_vendor_typed *vendor; + extern void sn_common_hubdev_init(struct hubdev_info *); + + status = acpi_get_vendor_resource(device->handle, METHOD_NAME__CRS, + &sn_uuid, &buffer); + if (ACPI_FAILURE(status)) { + printk(KERN_ERR + "sn_hubdev_add: acpi_get_vendor_resource() failed: %d\n", + status); + return 1; + } + + resource = buffer.pointer; + vendor = &resource->data.vendor_typed; + if ((vendor->byte_length - sizeof(struct acpi_vendor_uuid)) != + sizeof(struct hubdev_info *)) { + printk(KERN_ERR + "sn_hubdev_add: Invalid vendor data length: %d\n", + vendor->byte_length); + ret = 1; + goto exit; + } + + memcpy(&addr, vendor->byte_data, sizeof(struct hubdev_info *)); + hubdev_ptr = __va((struct hubdev_info *) addr); + + nasid = hubdev_ptr->hdi_nasid; + i = nasid_to_cnodeid(nasid); + hubdev = (struct hubdev_info *)(NODEPDA(i)->pdinfo); + *hubdev = *hubdev_ptr; + sn_common_hubdev_init(hubdev); + +exit: + kfree(buffer.pointer); + return ret; +} + +/* + * sn_get_bussoft_ptr() - The pcibus_bussoft pointer is found in + * the ACPI Vendor resource for this bus. + */ +static struct pcibus_bussoft * +sn_get_bussoft_ptr(struct pci_bus *bus) +{ + u64 addr; + struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; + acpi_handle handle; + struct pcibus_bussoft *prom_bussoft_ptr; + struct acpi_resource *resource; + acpi_status status; + struct acpi_resource_vendor_typed *vendor; + + + handle = PCI_CONTROLLER(bus)->acpi_handle; + status = acpi_get_vendor_resource(handle, METHOD_NAME__CRS, + &sn_uuid, &buffer); + if (ACPI_FAILURE(status)) { + printk(KERN_ERR "get_acpi_pcibus_ptr: " + "get_acpi_bussoft_info() failed: %d\n", + status); + return NULL; + } + resource = buffer.pointer; + vendor = &resource->data.vendor_typed; + + if ((vendor->byte_length - sizeof(struct acpi_vendor_uuid)) != + sizeof(struct pcibus_bussoft *)) { + printk(KERN_ERR + "get_acpi_bussoft_ptr: Invalid vendor data " + "length %d\n", vendor->byte_length); + kfree(buffer.pointer); + return NULL; + } + memcpy(&addr, vendor->byte_data, sizeof(struct pcibus_bussoft *)); + prom_bussoft_ptr = __va((struct pcibus_bussoft *) addr); + kfree(buffer.pointer); + + return prom_bussoft_ptr; +} + +/* + * sn_acpi_bus_fixup + */ +void +sn_acpi_bus_fixup(struct pci_bus *bus) +{ + struct pci_dev *pci_dev = NULL; + struct pcibus_bussoft *prom_bussoft_ptr; + extern void sn_common_bus_fixup(struct pci_bus *, + struct pcibus_bussoft *); + + if (!bus->parent) { /* If root bus */ + prom_bussoft_ptr = sn_get_bussoft_ptr(bus); + if (prom_bussoft_ptr == NULL) { + printk(KERN_ERR + "sn_pci_fixup_bus: 0x%04x:0x%02x Unable to " + "obtain prom_bussoft_ptr\n", + pci_domain_nr(bus), bus->number); + return; + } + sn_common_bus_fixup(bus, prom_bussoft_ptr); + } + list_for_each_entry(pci_dev, &bus->devices, bus_list) { + sn_pci_fixup_slot(pci_dev); + } +} + +/* + * sn_acpi_slot_fixup - Perform any SN specific slot fixup. + * At present there does not appear to be + * any generic way to handle a ROM image + * that has been shadowed by the PROM, so + * we pass a pointer to it within the + * pcidev_info structure. + */ + +void +sn_acpi_slot_fixup(struct pci_dev *dev, struct pcidev_info *pcidev_info) +{ + void __iomem *addr; + size_t size; + + if (pcidev_info->pdi_pio_mapped_addr[PCI_ROM_RESOURCE]) { + /* + * A valid ROM image exists and has been shadowed by the + * PROM. Setup the pci_dev ROM resource to point to + * the shadowed copy. + */ + size = dev->resource[PCI_ROM_RESOURCE].end - + dev->resource[PCI_ROM_RESOURCE].start; + addr = + ioremap(pcidev_info->pdi_pio_mapped_addr[PCI_ROM_RESOURCE], + size); + dev->resource[PCI_ROM_RESOURCE].start = (unsigned long) addr; + dev->resource[PCI_ROM_RESOURCE].end = + (unsigned long) addr + size; + dev->resource[PCI_ROM_RESOURCE].flags |= IORESOURCE_ROM_BIOS_COPY; + } +} + +static struct acpi_driver acpi_sn_hubdev_driver = { + .name = "SGI HUBDEV Driver", + .ids = "SGIHUB,SGITIO", + .ops = { + .add = sn_hubdev_add, + }, +}; + + +/* + * sn_io_acpi_init - PROM has ACPI support for IO, defining at a minimum the + * nodes and root buses in the DSDT. As a result, bus scanning + * will be initiated by the Linux ACPI code. + */ + +void __init +sn_io_acpi_init(void) +{ + u64 result; + s64 status; + + acpi_bus_register_driver(&acpi_sn_hubdev_driver); + status = sal_ioif_init(&result); + if (status || result) + panic("sal_ioif_init failed: [%lx] %s\n", + status, ia64_sal_strerror(status)); +} diff --git a/arch/ia64/sn/kernel/io_common.c b/arch/ia64/sn/kernel/io_common.c new file mode 100644 index 000000000000..d4dd8f4b6b8d --- /dev/null +++ b/arch/ia64/sn/kernel/io_common.c @@ -0,0 +1,613 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2006 Silicon Graphics, Inc. All rights reserved. + */ + +#include <linux/bootmem.h> +#include <asm/sn/types.h> +#include <asm/sn/addrs.h> +#include <asm/sn/sn_feature_sets.h> +#include <asm/sn/geo.h> +#include <asm/sn/io.h> +#include <asm/sn/l1.h> +#include <asm/sn/module.h> +#include <asm/sn/pcibr_provider.h> +#include <asm/sn/pcibus_provider_defs.h> +#include <asm/sn/pcidev.h> +#include <asm/sn/simulator.h> +#include <asm/sn/sn_sal.h> +#include <asm/sn/tioca_provider.h> +#include <asm/sn/tioce_provider.h> +#include "xtalk/hubdev.h" +#include "xtalk/xwidgetdev.h" +#include <linux/acpi.h> +#include <asm/sn/sn2/sn_hwperf.h> +#include <asm/sn/acpi.h> + +extern void sn_init_cpei_timer(void); +extern void register_sn_procfs(void); +extern void sn_acpi_bus_fixup(struct pci_bus *); +extern void sn_bus_fixup(struct pci_bus *); +extern void sn_acpi_slot_fixup(struct pci_dev *, struct pcidev_info *); +extern void sn_more_slot_fixup(struct pci_dev *, struct pcidev_info *); +extern void sn_legacy_pci_window_fixup(struct pci_controller *, u64, u64); +extern void sn_io_acpi_init(void); +extern void sn_io_init(void); + + +static struct list_head sn_sysdata_list; + +/* sysdata list struct */ +struct sysdata_el { + struct list_head entry; + void *sysdata; +}; + +int sn_ioif_inited; /* SN I/O infrastructure initialized? */ + +struct sn_pcibus_provider *sn_pci_provider[PCIIO_ASIC_MAX_TYPES]; /* indexed by asic type */ + +/* + * Hooks and struct for unsupported pci providers + */ + +static dma_addr_t +sn_default_pci_map(struct pci_dev *pdev, unsigned long paddr, size_t size, int type) +{ + return 0; +} + +static void +sn_default_pci_unmap(struct pci_dev *pdev, dma_addr_t addr, int direction) +{ + return; +} + +static void * +sn_default_pci_bus_fixup(struct pcibus_bussoft *soft, struct pci_controller *controller) +{ + return NULL; +} + +static struct sn_pcibus_provider sn_pci_default_provider = { + .dma_map = sn_default_pci_map, + .dma_map_consistent = sn_default_pci_map, + .dma_unmap = sn_default_pci_unmap, + .bus_fixup = sn_default_pci_bus_fixup, +}; + +/* + * Retrieve the DMA Flush List given nasid, widget, and device. + * This list is needed to implement the WAR - Flush DMA data on PIO Reads. + */ +static inline u64 +sal_get_device_dmaflush_list(u64 nasid, u64 widget_num, u64 device_num, + u64 address) +{ + struct ia64_sal_retval ret_stuff; + ret_stuff.status = 0; + ret_stuff.v0 = 0; + + SAL_CALL_NOLOCK(ret_stuff, + (u64) SN_SAL_IOIF_GET_DEVICE_DMAFLUSH_LIST, + (u64) nasid, (u64) widget_num, + (u64) device_num, (u64) address, 0, 0, 0); + return ret_stuff.status; +} + +/* + * Retrieve the pci device information given the bus and device|function number. + */ +static inline u64 +sal_get_pcidev_info(u64 segment, u64 bus_number, u64 devfn, u64 pci_dev, + u64 sn_irq_info) +{ + struct ia64_sal_retval ret_stuff; + ret_stuff.status = 0; + ret_stuff.v0 = 0; + + SAL_CALL_NOLOCK(ret_stuff, + (u64) SN_SAL_IOIF_GET_PCIDEV_INFO, + (u64) segment, (u64) bus_number, (u64) devfn, + (u64) pci_dev, + sn_irq_info, 0, 0); + return ret_stuff.v0; +} + +/* + * sn_pcidev_info_get() - Retrieve the pcidev_info struct for the specified + * device. + */ +inline struct pcidev_info * +sn_pcidev_info_get(struct pci_dev *dev) +{ + struct pcidev_info *pcidev; + + list_for_each_entry(pcidev, + &(SN_PLATFORM_DATA(dev)->pcidev_info), pdi_list) { + if (pcidev->pdi_linux_pcidev == dev) + return pcidev; + } + return NULL; +} + +/* Older PROM flush WAR + * + * 01/16/06 -- This war will be in place until a new official PROM is released. + * Additionally note that the struct sn_flush_device_war also has to be + * removed from arch/ia64/sn/include/xtalk/hubdev.h + */ +static u8 war_implemented = 0; + +static s64 sn_device_fixup_war(u64 nasid, u64 widget, int device, + struct sn_flush_device_common *common) +{ + struct sn_flush_device_war *war_list; + struct sn_flush_device_war *dev_entry; + struct ia64_sal_retval isrv = {0,0,0,0}; + + if (!war_implemented) { + printk(KERN_WARNING "PROM version < 4.50 -- implementing old " + "PROM flush WAR\n"); + war_implemented = 1; + } + + war_list = kzalloc(DEV_PER_WIDGET * sizeof(*war_list), GFP_KERNEL); + if (!war_list) + BUG(); + + SAL_CALL_NOLOCK(isrv, SN_SAL_IOIF_GET_WIDGET_DMAFLUSH_LIST, + nasid, widget, __pa(war_list), 0, 0, 0 ,0); + if (isrv.status) + panic("sn_device_fixup_war failed: %s\n", + ia64_sal_strerror(isrv.status)); + + dev_entry = war_list + device; + memcpy(common,dev_entry, sizeof(*common)); + kfree(war_list); + + return isrv.status; +} + +/* + * sn_common_hubdev_init() - This routine is called to initialize the HUB data + * structure for each node in the system. + */ +void __init +sn_common_hubdev_init(struct hubdev_info *hubdev) +{ + + struct sn_flush_device_kernel *sn_flush_device_kernel; + struct sn_flush_device_kernel *dev_entry; + s64 status; + int widget, device, size; + + /* Attach the error interrupt handlers */ + if (hubdev->hdi_nasid & 1) /* If TIO */ + ice_error_init(hubdev); + else + hub_error_init(hubdev); + + for (widget = 0; widget <= HUB_WIDGET_ID_MAX; widget++) + hubdev->hdi_xwidget_info[widget].xwi_hubinfo = hubdev; + + if (!hubdev->hdi_flush_nasid_list.widget_p) + return; + + size = (HUB_WIDGET_ID_MAX + 1) * + sizeof(struct sn_flush_device_kernel *); + hubdev->hdi_flush_nasid_list.widget_p = + kzalloc(size, GFP_KERNEL); + if (!hubdev->hdi_flush_nasid_list.widget_p) + BUG(); + + for (widget = 0; widget <= HUB_WIDGET_ID_MAX; widget++) { + size = DEV_PER_WIDGET * + sizeof(struct sn_flush_device_kernel); + sn_flush_device_kernel = kzalloc(size, GFP_KERNEL); + if (!sn_flush_device_kernel) + BUG(); + + dev_entry = sn_flush_device_kernel; + for (device = 0; device < DEV_PER_WIDGET; + device++, dev_entry++) { + size = sizeof(struct sn_flush_device_common); + dev_entry->common = kzalloc(size, GFP_KERNEL); + if (!dev_entry->common) + BUG(); + if (sn_prom_feature_available(PRF_DEVICE_FLUSH_LIST)) + status = sal_get_device_dmaflush_list( + hubdev->hdi_nasid, widget, device, + (u64)(dev_entry->common)); + else + status = sn_device_fixup_war(hubdev->hdi_nasid, + widget, device, + dev_entry->common); + if (status != SALRET_OK) + panic("SAL call failed: %s\n", + ia64_sal_strerror(status)); + + spin_lock_init(&dev_entry->sfdl_flush_lock); + } + + if (sn_flush_device_kernel) + hubdev->hdi_flush_nasid_list.widget_p[widget] = + sn_flush_device_kernel; + } +} + +void sn_pci_unfixup_slot(struct pci_dev *dev) +{ + struct pci_dev *host_pci_dev = SN_PCIDEV_INFO(dev)->host_pci_dev; + + sn_irq_unfixup(dev); + pci_dev_put(host_pci_dev); + pci_dev_put(dev); +} + +/* + * sn_pci_fixup_slot() - This routine sets up a slot's resources consistent + * with the Linux PCI abstraction layer. Resources + * acquired from our PCI provider include PIO maps + * to BAR space and interrupt objects. + */ +void sn_pci_fixup_slot(struct pci_dev *dev) +{ + int segment = pci_domain_nr(dev->bus); + int status = 0; + struct pcibus_bussoft *bs; + struct pci_bus *host_pci_bus; + struct pci_dev *host_pci_dev; + struct pcidev_info *pcidev_info; + struct sn_irq_info *sn_irq_info; + unsigned int bus_no, devfn; + + pci_dev_get(dev); /* for the sysdata pointer */ + pcidev_info = kzalloc(sizeof(struct pcidev_info), GFP_KERNEL); + if (!pcidev_info) + BUG(); /* Cannot afford to run out of memory */ + + sn_irq_info = kzalloc(sizeof(struct sn_irq_info), GFP_KERNEL); + if (!sn_irq_info) + BUG(); /* Cannot afford to run out of memory */ + + /* Call to retrieve pci device information needed by kernel. */ + status = sal_get_pcidev_info((u64) segment, (u64) dev->bus->number, + dev->devfn, + (u64) __pa(pcidev_info), + (u64) __pa(sn_irq_info)); + if (status) + BUG(); /* Cannot get platform pci device information */ + + /* Add pcidev_info to list in pci_controller.platform_data */ + list_add_tail(&pcidev_info->pdi_list, + &(SN_PLATFORM_DATA(dev->bus)->pcidev_info)); + + if (SN_ACPI_BASE_SUPPORT()) + sn_acpi_slot_fixup(dev, pcidev_info); + else + sn_more_slot_fixup(dev, pcidev_info); + /* + * Using the PROMs values for the PCI host bus, get the Linux + * PCI host_pci_dev struct and set up host bus linkages + */ + + bus_no = (pcidev_info->pdi_slot_host_handle >> 32) & 0xff; + devfn = pcidev_info->pdi_slot_host_handle & 0xffffffff; + host_pci_bus = pci_find_bus(segment, bus_no); + host_pci_dev = pci_get_slot(host_pci_bus, devfn); + + pcidev_info->host_pci_dev = host_pci_dev; + pcidev_info->pdi_linux_pcidev = dev; + pcidev_info->pdi_host_pcidev_info = SN_PCIDEV_INFO(host_pci_dev); + bs = SN_PCIBUS_BUSSOFT(dev->bus); + pcidev_info->pdi_pcibus_info = bs; + + if (bs && bs->bs_asic_type < PCIIO_ASIC_MAX_TYPES) { + SN_PCIDEV_BUSPROVIDER(dev) = sn_pci_provider[bs->bs_asic_type]; + } else { + SN_PCIDEV_BUSPROVIDER(dev) = &sn_pci_default_provider; + } + + /* Only set up IRQ stuff if this device has a host bus context */ + if (bs && sn_irq_info->irq_irq) { + pcidev_info->pdi_sn_irq_info = sn_irq_info; + dev->irq = pcidev_info->pdi_sn_irq_info->irq_irq; + sn_irq_fixup(dev, sn_irq_info); + } else { + pcidev_info->pdi_sn_irq_info = NULL; + kfree(sn_irq_info); + } +} + +/* + * sn_common_bus_fixup - Perform platform specific bus fixup. + * Execute the ASIC specific fixup routine + * for this bus. + */ +void +sn_common_bus_fixup(struct pci_bus *bus, + struct pcibus_bussoft *prom_bussoft_ptr) +{ + int cnode; + struct pci_controller *controller; + struct hubdev_info *hubdev_info; + int nasid; + void *provider_soft; + struct sn_pcibus_provider *provider; + struct sn_platform_data *sn_platform_data; + + controller = PCI_CONTROLLER(bus); + /* + * Per-provider fixup. Copies the bus soft structure from prom + * to local area and links SN_PCIBUS_BUSSOFT(). + */ + + if (prom_bussoft_ptr->bs_asic_type >= PCIIO_ASIC_MAX_TYPES) { + printk(KERN_WARNING "sn_common_bus_fixup: Unsupported asic type, %d", + prom_bussoft_ptr->bs_asic_type); + return; + } + + if (prom_bussoft_ptr->bs_asic_type == PCIIO_ASIC_TYPE_PPB) + return; /* no further fixup necessary */ + + provider = sn_pci_provider[prom_bussoft_ptr->bs_asic_type]; + if (provider == NULL) + panic("sn_common_bus_fixup: No provider registered for this asic type, %d", + prom_bussoft_ptr->bs_asic_type); + + if (provider->bus_fixup) + provider_soft = (*provider->bus_fixup) (prom_bussoft_ptr, + controller); + else + provider_soft = NULL; + + /* + * Generic bus fixup goes here. Don't reference prom_bussoft_ptr + * after this point. + */ + controller->platform_data = kzalloc(sizeof(struct sn_platform_data), + GFP_KERNEL); + if (controller->platform_data == NULL) + BUG(); + sn_platform_data = + (struct sn_platform_data *) controller->platform_data; + sn_platform_data->provider_soft = provider_soft; + INIT_LIST_HEAD(&((struct sn_platform_data *) + controller->platform_data)->pcidev_info); + nasid = NASID_GET(SN_PCIBUS_BUSSOFT(bus)->bs_base); + cnode = nasid_to_cnodeid(nasid); + hubdev_info = (struct hubdev_info *)(NODEPDA(cnode)->pdinfo); + SN_PCIBUS_BUSSOFT(bus)->bs_xwidget_info = + &(hubdev_info->hdi_xwidget_info[SN_PCIBUS_BUSSOFT(bus)->bs_xid]); + + /* + * If the node information we obtained during the fixup phase is + * invalid then set controller->node to -1 (undetermined) + */ + if (controller->node >= num_online_nodes()) { + struct pcibus_bussoft *b = SN_PCIBUS_BUSSOFT(bus); + + printk(KERN_WARNING "Device ASIC=%u XID=%u PBUSNUM=%u" + "L_IO=%lx L_MEM=%lx BASE=%lx\n", + b->bs_asic_type, b->bs_xid, b->bs_persist_busnum, + b->bs_legacy_io, b->bs_legacy_mem, b->bs_base); + printk(KERN_WARNING "on node %d but only %d nodes online." + "Association set to undetermined.\n", + controller->node, num_online_nodes()); + controller->node = -1; + } +} + +void sn_bus_store_sysdata(struct pci_dev *dev) +{ + struct sysdata_el *element; + + element = kzalloc(sizeof(struct sysdata_el), GFP_KERNEL); + if (!element) { + dev_dbg(dev, "%s: out of memory!\n", __FUNCTION__); + return; + } + element->sysdata = SN_PCIDEV_INFO(dev); + list_add(&element->entry, &sn_sysdata_list); +} + +void sn_bus_free_sysdata(void) +{ + struct sysdata_el *element; + struct list_head *list, *safe; + + list_for_each_safe(list, safe, &sn_sysdata_list) { + element = list_entry(list, struct sysdata_el, entry); + list_del(&element->entry); + list_del(&(((struct pcidev_info *) + (element->sysdata))->pdi_list)); + kfree(element->sysdata); + kfree(element); + } + return; +} + +/* + * hubdev_init_node() - Creates the HUB data structure and link them to it's + * own NODE specific data area. + */ +void hubdev_init_node(nodepda_t * npda, cnodeid_t node) +{ + struct hubdev_info *hubdev_info; + int size; + pg_data_t *pg; + + size = sizeof(struct hubdev_info); + + if (node >= num_online_nodes()) /* Headless/memless IO nodes */ + pg = NODE_DATA(0); + else + pg = NODE_DATA(node); + + hubdev_info = (struct hubdev_info *)alloc_bootmem_node(pg, size); + + npda->pdinfo = (void *)hubdev_info; +} + +geoid_t +cnodeid_get_geoid(cnodeid_t cnode) +{ + struct hubdev_info *hubdev; + + hubdev = (struct hubdev_info *)(NODEPDA(cnode)->pdinfo); + return hubdev->hdi_geoid; +} + +void sn_generate_path(struct pci_bus *pci_bus, char *address) +{ + nasid_t nasid; + cnodeid_t cnode; + geoid_t geoid; + moduleid_t moduleid; + u16 bricktype; + + nasid = NASID_GET(SN_PCIBUS_BUSSOFT(pci_bus)->bs_base); + cnode = nasid_to_cnodeid(nasid); + geoid = cnodeid_get_geoid(cnode); + moduleid = geo_module(geoid); + + sprintf(address, "module_%c%c%c%c%.2d", + '0'+RACK_GET_CLASS(MODULE_GET_RACK(moduleid)), + '0'+RACK_GET_GROUP(MODULE_GET_RACK(moduleid)), + '0'+RACK_GET_NUM(MODULE_GET_RACK(moduleid)), + MODULE_GET_BTCHAR(moduleid), MODULE_GET_BPOS(moduleid)); + + /* Tollhouse requires slot id to be displayed */ + bricktype = MODULE_GET_BTYPE(moduleid); + if ((bricktype == L1_BRICKTYPE_191010) || + (bricktype == L1_BRICKTYPE_1932)) + sprintf(address, "%s^%d", address, geo_slot(geoid)); +} + +/* + * sn_pci_fixup_bus() - Perform SN specific setup of software structs + * (pcibus_bussoft, pcidev_info) and hardware + * registers, for the specified bus and devices under it. + */ +void __devinit +sn_pci_fixup_bus(struct pci_bus *bus) +{ + + if (SN_ACPI_BASE_SUPPORT()) + sn_acpi_bus_fixup(bus); + else + sn_bus_fixup(bus); +} + +/* + * sn_io_early_init - Perform early IO (and some non-IO) initialization. + * In particular, setup the sn_pci_provider[] array. + * This needs to be done prior to any bus scanning + * (acpi_scan_init()) in the ACPI case, as the SN + * bus fixup code will reference the array. + */ +static int __init +sn_io_early_init(void) +{ + int i; + + if (!ia64_platform_is("sn2") || IS_RUNNING_ON_FAKE_PROM()) + return 0; + + /* + * prime sn_pci_provider[]. Individial provider init routines will + * override their respective default entries. + */ + + for (i = 0; i < PCIIO_ASIC_MAX_TYPES; i++) + sn_pci_provider[i] = &sn_pci_default_provider; + + pcibr_init_provider(); + tioca_init_provider(); + tioce_init_provider(); + + /* + * This is needed to avoid bounce limit checks in the blk layer + */ + ia64_max_iommu_merge_mask = ~PAGE_MASK; + + sn_irq_lh_init(); + INIT_LIST_HEAD(&sn_sysdata_list); + sn_init_cpei_timer(); + +#ifdef CONFIG_PROC_FS + register_sn_procfs(); +#endif + + printk(KERN_INFO "ACPI DSDT OEM Rev 0x%x\n", + acpi_gbl_DSDT->oem_revision); + if (SN_ACPI_BASE_SUPPORT()) + sn_io_acpi_init(); + else + sn_io_init(); + return 0; +} + +arch_initcall(sn_io_early_init); + +/* + * sn_io_late_init() - Perform any final platform specific IO initialization. + */ + +int __init +sn_io_late_init(void) +{ + struct pci_bus *bus; + struct pcibus_bussoft *bussoft; + cnodeid_t cnode; + nasid_t nasid; + cnodeid_t near_cnode; + + if (!ia64_platform_is("sn2") || IS_RUNNING_ON_FAKE_PROM()) + return 0; + + /* + * Setup closest node in pci_controller->node for + * PIC, TIOCP, TIOCE (TIOCA does it during bus fixup using + * info from the PROM). + */ + bus = NULL; + while ((bus = pci_find_next_bus(bus)) != NULL) { + bussoft = SN_PCIBUS_BUSSOFT(bus); + nasid = NASID_GET(bussoft->bs_base); + cnode = nasid_to_cnodeid(nasid); + if ((bussoft->bs_asic_type == PCIIO_ASIC_TYPE_TIOCP) || + (bussoft->bs_asic_type == PCIIO_ASIC_TYPE_TIOCE)) { + /* TIO PCI Bridge: find nearest node with CPUs */ + int e = sn_hwperf_get_nearest_node(cnode, NULL, + &near_cnode); + if (e < 0) { + near_cnode = (cnodeid_t)-1; /* use any node */ + printk(KERN_WARNING "pcibr_bus_fixup: failed " + "to find near node with CPUs to TIO " + "node %d, err=%d\n", cnode, e); + } + PCI_CONTROLLER(bus)->node = near_cnode; + } else if (bussoft->bs_asic_type == PCIIO_ASIC_TYPE_PIC) { + PCI_CONTROLLER(bus)->node = cnode; + } + } + + sn_ioif_inited = 1; /* SN I/O infrastructure now initialized */ + + return 0; +} + +fs_initcall(sn_io_late_init); + +EXPORT_SYMBOL(sn_pci_fixup_slot); +EXPORT_SYMBOL(sn_pci_unfixup_slot); +EXPORT_SYMBOL(sn_bus_store_sysdata); +EXPORT_SYMBOL(sn_bus_free_sysdata); +EXPORT_SYMBOL(sn_generate_path); + diff --git a/arch/ia64/sn/kernel/io_init.c b/arch/ia64/sn/kernel/io_init.c index dc09a6a28a37..9ad843e0383b 100644 --- a/arch/ia64/sn/kernel/io_init.c +++ b/arch/ia64/sn/kernel/io_init.c @@ -3,103 +3,28 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. * - * Copyright (C) 1992 - 1997, 2000-2005 Silicon Graphics, Inc. All rights reserved. + * Copyright (C) 1992 - 1997, 2000-2006 Silicon Graphics, Inc. All rights reserved. */ -#include <linux/bootmem.h> -#include <linux/nodemask.h> #include <asm/sn/types.h> #include <asm/sn/addrs.h> -#include <asm/sn/sn_feature_sets.h> -#include <asm/sn/geo.h> #include <asm/sn/io.h> -#include <asm/sn/l1.h> #include <asm/sn/module.h> -#include <asm/sn/pcibr_provider.h> +#include <asm/sn/intr.h> #include <asm/sn/pcibus_provider_defs.h> #include <asm/sn/pcidev.h> -#include <asm/sn/simulator.h> #include <asm/sn/sn_sal.h> -#include <asm/sn/tioca_provider.h> -#include <asm/sn/tioce_provider.h> #include "xtalk/hubdev.h" -#include "xtalk/xwidgetdev.h" - - -extern void sn_init_cpei_timer(void); -extern void register_sn_procfs(void); - -static struct list_head sn_sysdata_list; - -/* sysdata list struct */ -struct sysdata_el { - struct list_head entry; - void *sysdata; -}; - -struct slab_info { - struct hubdev_info hubdev; -}; - -struct brick { - moduleid_t id; /* Module ID of this module */ - struct slab_info slab_info[MAX_SLABS + 1]; -}; - -int sn_ioif_inited; /* SN I/O infrastructure initialized? */ - -struct sn_pcibus_provider *sn_pci_provider[PCIIO_ASIC_MAX_TYPES]; /* indexed by asic type */ - -static int max_segment_number; /* Default highest segment number */ -static int max_pcibus_number = 255; /* Default highest pci bus number */ /* - * Hooks and struct for unsupported pci providers + * The code in this file will only be executed when running with + * a PROM that does _not_ have base ACPI IO support. + * (i.e., SN_ACPI_BASE_SUPPORT() == 0) */ -static dma_addr_t -sn_default_pci_map(struct pci_dev *pdev, unsigned long paddr, size_t size, int type) -{ - return 0; -} - -static void -sn_default_pci_unmap(struct pci_dev *pdev, dma_addr_t addr, int direction) -{ - return; -} - -static void * -sn_default_pci_bus_fixup(struct pcibus_bussoft *soft, struct pci_controller *controller) -{ - return NULL; -} - -static struct sn_pcibus_provider sn_pci_default_provider = { - .dma_map = sn_default_pci_map, - .dma_map_consistent = sn_default_pci_map, - .dma_unmap = sn_default_pci_unmap, - .bus_fixup = sn_default_pci_bus_fixup, -}; - -/* - * Retrieve the DMA Flush List given nasid, widget, and device. - * This list is needed to implement the WAR - Flush DMA data on PIO Reads. - */ -static inline u64 -sal_get_device_dmaflush_list(u64 nasid, u64 widget_num, u64 device_num, - u64 address) -{ - struct ia64_sal_retval ret_stuff; - ret_stuff.status = 0; - ret_stuff.v0 = 0; +static int max_segment_number; /* Default highest segment number */ +static int max_pcibus_number = 255; /* Default highest pci bus number */ - SAL_CALL_NOLOCK(ret_stuff, - (u64) SN_SAL_IOIF_GET_DEVICE_DMAFLUSH_LIST, - (u64) nasid, (u64) widget_num, - (u64) device_num, (u64) address, 0, 0, 0); - return ret_stuff.status; -} /* * Retrieve the hub device info structure for the given nasid. @@ -131,93 +56,20 @@ static inline u64 sal_get_pcibus_info(u64 segment, u64 busnum, u64 address) return ret_stuff.v0; } -/* - * Retrieve the pci device information given the bus and device|function number. - */ -static inline u64 -sal_get_pcidev_info(u64 segment, u64 bus_number, u64 devfn, u64 pci_dev, - u64 sn_irq_info) -{ - struct ia64_sal_retval ret_stuff; - ret_stuff.status = 0; - ret_stuff.v0 = 0; - - SAL_CALL_NOLOCK(ret_stuff, - (u64) SN_SAL_IOIF_GET_PCIDEV_INFO, - (u64) segment, (u64) bus_number, (u64) devfn, - (u64) pci_dev, - sn_irq_info, 0, 0); - return ret_stuff.v0; -} - -/* - * sn_pcidev_info_get() - Retrieve the pcidev_info struct for the specified - * device. - */ -inline struct pcidev_info * -sn_pcidev_info_get(struct pci_dev *dev) -{ - struct pcidev_info *pcidev; - - list_for_each_entry(pcidev, - &(SN_PCI_CONTROLLER(dev)->pcidev_info), pdi_list) { - if (pcidev->pdi_linux_pcidev == dev) { - return pcidev; - } - } - return NULL; -} - -/* Older PROM flush WAR - * - * 01/16/06 -- This war will be in place until a new official PROM is released. - * Additionally note that the struct sn_flush_device_war also has to be - * removed from arch/ia64/sn/include/xtalk/hubdev.h - */ -static u8 war_implemented = 0; - -static s64 sn_device_fixup_war(u64 nasid, u64 widget, int device, - struct sn_flush_device_common *common) -{ - struct sn_flush_device_war *war_list; - struct sn_flush_device_war *dev_entry; - struct ia64_sal_retval isrv = {0,0,0,0}; - - if (!war_implemented) { - printk(KERN_WARNING "PROM version < 4.50 -- implementing old " - "PROM flush WAR\n"); - war_implemented = 1; - } - - war_list = kzalloc(DEV_PER_WIDGET * sizeof(*war_list), GFP_KERNEL); - if (!war_list) - BUG(); - - SAL_CALL_NOLOCK(isrv, SN_SAL_IOIF_GET_WIDGET_DMAFLUSH_LIST, - nasid, widget, __pa(war_list), 0, 0, 0 ,0); - if (isrv.status) - panic("sn_device_fixup_war failed: %s\n", - ia64_sal_strerror(isrv.status)); - - dev_entry = war_list + device; - memcpy(common,dev_entry, sizeof(*common)); - kfree(war_list); - - return isrv.status; -} /* - * sn_fixup_ionodes() - This routine initializes the HUB data strcuture for - * each node in the system. + * sn_fixup_ionodes() - This routine initializes the HUB data structure for + * each node in the system. This function is only + * executed when running with a non-ACPI capable PROM. */ static void __init sn_fixup_ionodes(void) { - struct sn_flush_device_kernel *sn_flush_device_kernel; - struct sn_flush_device_kernel *dev_entry; + struct hubdev_info *hubdev; u64 status; u64 nasid; - int i, widget, device, size; + int i; + extern void sn_common_hubdev_init(struct hubdev_info *); /* * Get SGI Specific HUB chipset information. @@ -240,70 +92,47 @@ static void __init sn_fixup_ionodes(void) max_segment_number = hubdev->max_segment_number; max_pcibus_number = hubdev->max_pcibus_number; } + sn_common_hubdev_init(hubdev); + } +} - /* Attach the error interrupt handlers */ - if (nasid & 1) - ice_error_init(hubdev); - else - hub_error_init(hubdev); - - for (widget = 0; widget <= HUB_WIDGET_ID_MAX; widget++) - hubdev->hdi_xwidget_info[widget].xwi_hubinfo = hubdev; - - if (!hubdev->hdi_flush_nasid_list.widget_p) - continue; - - size = (HUB_WIDGET_ID_MAX + 1) * - sizeof(struct sn_flush_device_kernel *); - hubdev->hdi_flush_nasid_list.widget_p = - kzalloc(size, GFP_KERNEL); - if (!hubdev->hdi_flush_nasid_list.widget_p) +/* + * sn_pci_legacy_window_fixup - Create PCI controller windows for + * legacy IO and MEM space. This needs to + * be done here, as the PROM does not have + * ACPI support defining the root buses + * and their resources (_CRS), + */ +static void +sn_legacy_pci_window_fixup(struct pci_controller *controller, + u64 legacy_io, u64 legacy_mem) +{ + controller->window = kcalloc(2, sizeof(struct pci_window), + GFP_KERNEL); + if (controller->window == NULL) BUG(); - - for (widget = 0; widget <= HUB_WIDGET_ID_MAX; widget++) { - size = DEV_PER_WIDGET * - sizeof(struct sn_flush_device_kernel); - sn_flush_device_kernel = kzalloc(size, GFP_KERNEL); - if (!sn_flush_device_kernel) - BUG(); - - dev_entry = sn_flush_device_kernel; - for (device = 0; device < DEV_PER_WIDGET; - device++,dev_entry++) { - size = sizeof(struct sn_flush_device_common); - dev_entry->common = kzalloc(size, GFP_KERNEL); - if (!dev_entry->common) - BUG(); - - if (sn_prom_feature_available( - PRF_DEVICE_FLUSH_LIST)) - status = sal_get_device_dmaflush_list( - nasid, widget, device, - (u64)(dev_entry->common)); - else - status = sn_device_fixup_war(nasid, - widget, device, - dev_entry->common); - if (status != SALRET_OK) - panic("SAL call failed: %s\n", - ia64_sal_strerror(status)); - - spin_lock_init(&dev_entry->sfdl_flush_lock); - } - - if (sn_flush_device_kernel) - hubdev->hdi_flush_nasid_list.widget_p[widget] = - sn_flush_device_kernel; - } - } + controller->window[0].offset = legacy_io; + controller->window[0].resource.name = "legacy_io"; + controller->window[0].resource.flags = IORESOURCE_IO; + controller->window[0].resource.start = legacy_io; + controller->window[0].resource.end = + controller->window[0].resource.start + 0xffff; + controller->window[0].resource.parent = &ioport_resource; + controller->window[1].offset = legacy_mem; + controller->window[1].resource.name = "legacy_mem"; + controller->window[1].resource.flags = IORESOURCE_MEM; + controller->window[1].resource.start = legacy_mem; + controller->window[1].resource.end = + controller->window[1].resource.start + (1024 * 1024) - 1; + controller->window[1].resource.parent = &iomem_resource; + controller->windows = 2; } /* * sn_pci_window_fixup() - Create a pci_window for each device resource. - * Until ACPI support is added, we need this code - * to setup pci_windows for use by - * pcibios_bus_to_resource(), - * pcibios_resource_to_bus(), etc. + * It will setup pci_windows for use by + * pcibios_bus_to_resource(), pcibios_resource_to_bus(), + * etc. */ static void sn_pci_window_fixup(struct pci_dev *dev, unsigned int count, @@ -342,60 +171,22 @@ sn_pci_window_fixup(struct pci_dev *dev, unsigned int count, controller->window = new_window; } -void sn_pci_unfixup_slot(struct pci_dev *dev) -{ - struct pci_dev *host_pci_dev = SN_PCIDEV_INFO(dev)->host_pci_dev; - - sn_irq_unfixup(dev); - pci_dev_put(host_pci_dev); - pci_dev_put(dev); -} - /* - * sn_pci_fixup_slot() - This routine sets up a slot's resources - * consistent with the Linux PCI abstraction layer. Resources acquired - * from our PCI provider include PIO maps to BAR space and interrupt - * objects. + * sn_more_slot_fixup() - We are not running with an ACPI capable PROM, + * and need to convert the pci_dev->resource + * 'start' and 'end' addresses to mapped addresses, + * and setup the pci_controller->window array entries. */ -void sn_pci_fixup_slot(struct pci_dev *dev) +void +sn_more_slot_fixup(struct pci_dev *dev, struct pcidev_info *pcidev_info) { unsigned int count = 0; int idx; - int segment = pci_domain_nr(dev->bus); - int status = 0; - struct pcibus_bussoft *bs; - struct pci_bus *host_pci_bus; - struct pci_dev *host_pci_dev; - struct pcidev_info *pcidev_info; s64 pci_addrs[PCI_ROM_RESOURCE + 1]; - struct sn_irq_info *sn_irq_info; - unsigned long size; - unsigned int bus_no, devfn; - - pci_dev_get(dev); /* for the sysdata pointer */ - pcidev_info = kzalloc(sizeof(struct pcidev_info), GFP_KERNEL); - if (!pcidev_info) - BUG(); /* Cannot afford to run out of memory */ - - sn_irq_info = kzalloc(sizeof(struct sn_irq_info), GFP_KERNEL); - if (!sn_irq_info) - BUG(); /* Cannot afford to run out of memory */ - - /* Call to retrieve pci device information needed by kernel. */ - status = sal_get_pcidev_info((u64) segment, (u64) dev->bus->number, - dev->devfn, - (u64) __pa(pcidev_info), - (u64) __pa(sn_irq_info)); - if (status) - BUG(); /* Cannot get platform pci device information */ - - /* Add pcidev_info to list in sn_pci_controller struct */ - list_add_tail(&pcidev_info->pdi_list, - &(SN_PCI_CONTROLLER(dev->bus)->pcidev_info)); + unsigned long addr, end, size, start; /* Copy over PIO Mapped Addresses */ for (idx = 0; idx <= PCI_ROM_RESOURCE; idx++) { - unsigned long start, end, addr; if (!pcidev_info->pdi_pio_mapped_addr[idx]) { pci_addrs[idx] = -1; @@ -419,60 +210,28 @@ void sn_pci_fixup_slot(struct pci_dev *dev) dev->resource[idx].parent = &ioport_resource; else dev->resource[idx].parent = &iomem_resource; + /* If ROM, mark as shadowed in PROM */ + if (idx == PCI_ROM_RESOURCE) + dev->resource[idx].flags |= IORESOURCE_ROM_BIOS_COPY; } /* Create a pci_window in the pci_controller struct for * each device resource. */ if (count > 0) sn_pci_window_fixup(dev, count, pci_addrs); - - /* - * Using the PROMs values for the PCI host bus, get the Linux - * PCI host_pci_dev struct and set up host bus linkages - */ - - bus_no = (pcidev_info->pdi_slot_host_handle >> 32) & 0xff; - devfn = pcidev_info->pdi_slot_host_handle & 0xffffffff; - host_pci_bus = pci_find_bus(segment, bus_no); - host_pci_dev = pci_get_slot(host_pci_bus, devfn); - - pcidev_info->host_pci_dev = host_pci_dev; - pcidev_info->pdi_linux_pcidev = dev; - pcidev_info->pdi_host_pcidev_info = SN_PCIDEV_INFO(host_pci_dev); - bs = SN_PCIBUS_BUSSOFT(dev->bus); - pcidev_info->pdi_pcibus_info = bs; - - if (bs && bs->bs_asic_type < PCIIO_ASIC_MAX_TYPES) { - SN_PCIDEV_BUSPROVIDER(dev) = sn_pci_provider[bs->bs_asic_type]; - } else { - SN_PCIDEV_BUSPROVIDER(dev) = &sn_pci_default_provider; - } - - /* Only set up IRQ stuff if this device has a host bus context */ - if (bs && sn_irq_info->irq_irq) { - pcidev_info->pdi_sn_irq_info = sn_irq_info; - dev->irq = pcidev_info->pdi_sn_irq_info->irq_irq; - sn_irq_fixup(dev, sn_irq_info); - } else { - pcidev_info->pdi_sn_irq_info = NULL; - kfree(sn_irq_info); - } } /* * sn_pci_controller_fixup() - This routine sets up a bus's resources - * consistent with the Linux PCI abstraction layer. + * consistent with the Linux PCI abstraction layer. */ -void sn_pci_controller_fixup(int segment, int busnum, struct pci_bus *bus) +static void +sn_pci_controller_fixup(int segment, int busnum, struct pci_bus *bus) { - int status; - int nasid, cnode; + s64 status = 0; struct pci_controller *controller; - struct sn_pci_controller *sn_controller; struct pcibus_bussoft *prom_bussoft_ptr; - struct hubdev_info *hubdev_info; - void *provider_soft; - struct sn_pcibus_provider *provider; + status = sal_get_pcibus_info((u64) segment, (u64) busnum, (u64) ia64_tpa(&prom_bussoft_ptr)); @@ -480,261 +239,77 @@ void sn_pci_controller_fixup(int segment, int busnum, struct pci_bus *bus) return; /*bus # does not exist */ prom_bussoft_ptr = __va(prom_bussoft_ptr); - /* Allocate a sn_pci_controller, which has a pci_controller struct - * as the first member. - */ - sn_controller = kzalloc(sizeof(struct sn_pci_controller), GFP_KERNEL); - if (!sn_controller) + controller = kzalloc(sizeof(*controller), GFP_KERNEL); + if (!controller) BUG(); - INIT_LIST_HEAD(&sn_controller->pcidev_info); - controller = &sn_controller->pci_controller; controller->segment = segment; - if (bus == NULL) { - bus = pci_scan_bus(busnum, &pci_root_ops, controller); - if (bus == NULL) - goto error_return; /* error, or bus already scanned */ - bus->sysdata = NULL; - } - - if (bus->sysdata) - goto error_return; /* sysdata already alloc'd */ - /* - * Per-provider fixup. Copies the contents from prom to local - * area and links SN_PCIBUS_BUSSOFT(). + * Temporarily save the prom_bussoft_ptr for use by sn_bus_fixup(). + * (platform_data will be overwritten later in sn_common_bus_fixup()) */ + controller->platform_data = prom_bussoft_ptr; - if (prom_bussoft_ptr->bs_asic_type >= PCIIO_ASIC_MAX_TYPES) - goto error_return; /* unsupported asic type */ - - if (prom_bussoft_ptr->bs_asic_type == PCIIO_ASIC_TYPE_PPB) - goto error_return; /* no further fixup necessary */ - - provider = sn_pci_provider[prom_bussoft_ptr->bs_asic_type]; - if (provider == NULL) - goto error_return; /* no provider registerd for this asic */ + bus = pci_scan_bus(busnum, &pci_root_ops, controller); + if (bus == NULL) + goto error_return; /* error, or bus already scanned */ bus->sysdata = controller; - if (provider->bus_fixup) - provider_soft = (*provider->bus_fixup) (prom_bussoft_ptr, controller); - else - provider_soft = NULL; - - if (provider_soft == NULL) { - /* fixup failed or not applicable */ - bus->sysdata = NULL; - goto error_return; - } - - /* - * Setup pci_windows for legacy IO and MEM space. - * (Temporary until ACPI support is in place.) - */ - controller->window = kcalloc(2, sizeof(struct pci_window), GFP_KERNEL); - if (controller->window == NULL) - BUG(); - controller->window[0].offset = prom_bussoft_ptr->bs_legacy_io; - controller->window[0].resource.name = "legacy_io"; - controller->window[0].resource.flags = IORESOURCE_IO; - controller->window[0].resource.start = prom_bussoft_ptr->bs_legacy_io; - controller->window[0].resource.end = - controller->window[0].resource.start + 0xffff; - controller->window[0].resource.parent = &ioport_resource; - controller->window[1].offset = prom_bussoft_ptr->bs_legacy_mem; - controller->window[1].resource.name = "legacy_mem"; - controller->window[1].resource.flags = IORESOURCE_MEM; - controller->window[1].resource.start = prom_bussoft_ptr->bs_legacy_mem; - controller->window[1].resource.end = - controller->window[1].resource.start + (1024 * 1024) - 1; - controller->window[1].resource.parent = &iomem_resource; - controller->windows = 2; - - /* - * Generic bus fixup goes here. Don't reference prom_bussoft_ptr - * after this point. - */ - - PCI_CONTROLLER(bus)->platform_data = provider_soft; - nasid = NASID_GET(SN_PCIBUS_BUSSOFT(bus)->bs_base); - cnode = nasid_to_cnodeid(nasid); - hubdev_info = (struct hubdev_info *)(NODEPDA(cnode)->pdinfo); - SN_PCIBUS_BUSSOFT(bus)->bs_xwidget_info = - &(hubdev_info->hdi_xwidget_info[SN_PCIBUS_BUSSOFT(bus)->bs_xid]); - /* - * If the node information we obtained during the fixup phase is invalid - * then set controller->node to -1 (undetermined) - */ - if (controller->node >= num_online_nodes()) { - struct pcibus_bussoft *b = SN_PCIBUS_BUSSOFT(bus); - - printk(KERN_WARNING "Device ASIC=%u XID=%u PBUSNUM=%u" - "L_IO=%lx L_MEM=%lx BASE=%lx\n", - b->bs_asic_type, b->bs_xid, b->bs_persist_busnum, - b->bs_legacy_io, b->bs_legacy_mem, b->bs_base); - printk(KERN_WARNING "on node %d but only %d nodes online." - "Association set to undetermined.\n", - controller->node, num_online_nodes()); - controller->node = -1; - } return; error_return: - kfree(sn_controller); + kfree(controller); return; } -void sn_bus_store_sysdata(struct pci_dev *dev) +/* + * sn_bus_fixup + */ +void +sn_bus_fixup(struct pci_bus *bus) { - struct sysdata_el *element; - - element = kzalloc(sizeof(struct sysdata_el), GFP_KERNEL); - if (!element) { - dev_dbg(dev, "%s: out of memory!\n", __FUNCTION__); - return; - } - element->sysdata = SN_PCIDEV_INFO(dev); - list_add(&element->entry, &sn_sysdata_list); -} + struct pci_dev *pci_dev = NULL; + struct pcibus_bussoft *prom_bussoft_ptr; + extern void sn_common_bus_fixup(struct pci_bus *, + struct pcibus_bussoft *); + + + if (!bus->parent) { /* If root bus */ + prom_bussoft_ptr = PCI_CONTROLLER(bus)->platform_data; + if (prom_bussoft_ptr == NULL) { + printk(KERN_ERR + "sn_bus_fixup: 0x%04x:0x%02x Unable to " + "obtain prom_bussoft_ptr\n", + pci_domain_nr(bus), bus->number); + return; + } + sn_common_bus_fixup(bus, prom_bussoft_ptr); + sn_legacy_pci_window_fixup(PCI_CONTROLLER(bus), + prom_bussoft_ptr->bs_legacy_io, + prom_bussoft_ptr->bs_legacy_mem); + } + list_for_each_entry(pci_dev, &bus->devices, bus_list) { + sn_pci_fixup_slot(pci_dev); + } -void sn_bus_free_sysdata(void) -{ - struct sysdata_el *element; - struct list_head *list, *safe; - - list_for_each_safe(list, safe, &sn_sysdata_list) { - element = list_entry(list, struct sysdata_el, entry); - list_del(&element->entry); - list_del(&(((struct pcidev_info *) - (element->sysdata))->pdi_list)); - kfree(element->sysdata); - kfree(element); - } - return; } /* - * Ugly hack to get PCI setup until we have a proper ACPI namespace. + * sn_io_init - PROM does not have ACPI support to define nodes or root buses, + * so we need to do things the hard way, including initiating the + * bus scanning ourselves. */ -#define PCI_BUSES_TO_SCAN 256 - -static int __init sn_pci_init(void) +void __init sn_io_init(void) { int i, j; - struct pci_dev *pci_dev = NULL; - - if (!ia64_platform_is("sn2") || IS_RUNNING_ON_FAKE_PROM()) - return 0; - - /* - * prime sn_pci_provider[]. Individial provider init routines will - * override their respective default entries. - */ - - for (i = 0; i < PCIIO_ASIC_MAX_TYPES; i++) - sn_pci_provider[i] = &sn_pci_default_provider; - pcibr_init_provider(); - tioca_init_provider(); - tioce_init_provider(); - - /* - * This is needed to avoid bounce limit checks in the blk layer - */ - ia64_max_iommu_merge_mask = ~PAGE_MASK; sn_fixup_ionodes(); - sn_irq_lh_init(); - INIT_LIST_HEAD(&sn_sysdata_list); - sn_init_cpei_timer(); - -#ifdef CONFIG_PROC_FS - register_sn_procfs(); -#endif /* busses are not known yet ... */ for (i = 0; i <= max_segment_number; i++) for (j = 0; j <= max_pcibus_number; j++) sn_pci_controller_fixup(i, j, NULL); - - /* - * Generic Linux PCI Layer has created the pci_bus and pci_dev - * structures - time for us to add our SN PLatform specific - * information. - */ - - while ((pci_dev = - pci_get_device(PCI_ANY_ID, PCI_ANY_ID, pci_dev)) != NULL) - sn_pci_fixup_slot(pci_dev); - - sn_ioif_inited = 1; /* sn I/O infrastructure now initialized */ - - return 0; -} - -/* - * hubdev_init_node() - Creates the HUB data structure and link them to it's - * own NODE specific data area. - */ -void hubdev_init_node(nodepda_t * npda, cnodeid_t node) -{ - struct hubdev_info *hubdev_info; - int size; - pg_data_t *pg; - - size = sizeof(struct hubdev_info); - - if (node >= num_online_nodes()) /* Headless/memless IO nodes */ - pg = NODE_DATA(0); - else - pg = NODE_DATA(node); - - hubdev_info = (struct hubdev_info *)alloc_bootmem_node(pg, size); - - npda->pdinfo = (void *)hubdev_info; } - -geoid_t -cnodeid_get_geoid(cnodeid_t cnode) -{ - struct hubdev_info *hubdev; - - hubdev = (struct hubdev_info *)(NODEPDA(cnode)->pdinfo); - return hubdev->hdi_geoid; -} - -void sn_generate_path(struct pci_bus *pci_bus, char *address) -{ - nasid_t nasid; - cnodeid_t cnode; - geoid_t geoid; - moduleid_t moduleid; - u16 bricktype; - - nasid = NASID_GET(SN_PCIBUS_BUSSOFT(pci_bus)->bs_base); - cnode = nasid_to_cnodeid(nasid); - geoid = cnodeid_get_geoid(cnode); - moduleid = geo_module(geoid); - - sprintf(address, "module_%c%c%c%c%.2d", - '0'+RACK_GET_CLASS(MODULE_GET_RACK(moduleid)), - '0'+RACK_GET_GROUP(MODULE_GET_RACK(moduleid)), - '0'+RACK_GET_NUM(MODULE_GET_RACK(moduleid)), - MODULE_GET_BTCHAR(moduleid), MODULE_GET_BPOS(moduleid)); - - /* Tollhouse requires slot id to be displayed */ - bricktype = MODULE_GET_BTYPE(moduleid); - if ((bricktype == L1_BRICKTYPE_191010) || - (bricktype == L1_BRICKTYPE_1932)) - sprintf(address, "%s^%d", address, geo_slot(geoid)); -} - -subsys_initcall(sn_pci_init); -EXPORT_SYMBOL(sn_pci_fixup_slot); -EXPORT_SYMBOL(sn_pci_unfixup_slot); -EXPORT_SYMBOL(sn_pci_controller_fixup); -EXPORT_SYMBOL(sn_bus_store_sysdata); -EXPORT_SYMBOL(sn_bus_free_sysdata); -EXPORT_SYMBOL(sn_generate_path); diff --git a/arch/ia64/sn/kernel/iomv.c b/arch/ia64/sn/kernel/iomv.c index 7ce3cdad627b..4aa4f301d56d 100644 --- a/arch/ia64/sn/kernel/iomv.c +++ b/arch/ia64/sn/kernel/iomv.c @@ -3,10 +3,11 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. * - * Copyright (C) 2000-2003 Silicon Graphics, Inc. All rights reserved. + * Copyright (C) 2000-2003, 2006 Silicon Graphics, Inc. All rights reserved. */ #include <linux/module.h> +#include <linux/acpi.h> #include <asm/io.h> #include <asm/delay.h> #include <asm/vga.h> @@ -15,6 +16,7 @@ #include <asm/sn/pda.h> #include <asm/sn/sn_cpuid.h> #include <asm/sn/shub_mmr.h> +#include <asm/sn/acpi.h> #define IS_LEGACY_VGA_IOPORT(p) \ (((p) >= 0x3b0 && (p) <= 0x3bb) || ((p) >= 0x3c0 && (p) <= 0x3df)) @@ -31,11 +33,14 @@ void *sn_io_addr(unsigned long port) { if (!IS_RUNNING_ON_SIMULATOR()) { if (IS_LEGACY_VGA_IOPORT(port)) - port += vga_console_iobase; + return (__ia64_mk_io_addr(port)); /* On sn2, legacy I/O ports don't point at anything */ if (port < (64 * 1024)) return NULL; - return ((void *)(port | __IA64_UNCACHED_OFFSET)); + if (SN_ACPI_BASE_SUPPORT()) + return (__ia64_mk_io_addr(port)); + else + return ((void *)(port | __IA64_UNCACHED_OFFSET)); } else { /* but the simulator uses them... */ unsigned long addr; diff --git a/arch/ia64/sn/kernel/irq.c b/arch/ia64/sn/kernel/irq.c index 7bb6ad188ba3..8c5bee01eaa2 100644 --- a/arch/ia64/sn/kernel/irq.c +++ b/arch/ia64/sn/kernel/irq.c @@ -117,7 +117,10 @@ struct sn_irq_info *sn_retarget_vector(struct sn_irq_info *sn_irq_info, nasid_t nasid, int slice) { int vector; + int cpuid; +#ifdef CONFIG_SMP int cpuphys; +#endif int64_t bridge; int local_widget, status; nasid_t local_nasid; @@ -146,7 +149,6 @@ struct sn_irq_info *sn_retarget_vector(struct sn_irq_info *sn_irq_info, vector = sn_irq_info->irq_irq; /* Free the old PROM new_irq_info structure */ sn_intr_free(local_nasid, local_widget, new_irq_info); - /* Update kernels new_irq_info with new target info */ unregister_intr_pda(new_irq_info); /* allocate a new PROM new_irq_info struct */ @@ -160,8 +162,10 @@ struct sn_irq_info *sn_retarget_vector(struct sn_irq_info *sn_irq_info, return NULL; } - cpuphys = nasid_slice_to_cpuid(nasid, slice); - new_irq_info->irq_cpuid = cpuphys; + /* Update kernels new_irq_info with new target info */ + cpuid = nasid_slice_to_cpuid(new_irq_info->irq_nasid, + new_irq_info->irq_slice); + new_irq_info->irq_cpuid = cpuid; register_intr_pda(new_irq_info); pci_provider = sn_pci_provider[new_irq_info->irq_bridge_type]; @@ -180,6 +184,7 @@ struct sn_irq_info *sn_retarget_vector(struct sn_irq_info *sn_irq_info, call_rcu(&sn_irq_info->rcu, sn_irq_info_free); #ifdef CONFIG_SMP + cpuphys = cpu_physical_id(cpuid); set_irq_affinity_info((vector & 0xff), cpuphys, 0); #endif @@ -201,7 +206,7 @@ static void sn_set_affinity_irq(unsigned int irq, cpumask_t mask) } struct hw_interrupt_type irq_type_sn = { - .typename = "SN hub", + .name = "SN hub", .startup = sn_startup_irq, .shutdown = sn_shutdown_irq, .enable = sn_enable_irq, @@ -299,6 +304,9 @@ void sn_irq_fixup(struct pci_dev *pci_dev, struct sn_irq_info *sn_irq_info) nasid_t nasid = sn_irq_info->irq_nasid; int slice = sn_irq_info->irq_slice; int cpu = nasid_slice_to_cpuid(nasid, slice); +#ifdef CONFIG_SMP + int cpuphys; +#endif pci_dev_get(pci_dev); sn_irq_info->irq_cpuid = cpu; @@ -311,6 +319,10 @@ void sn_irq_fixup(struct pci_dev *pci_dev, struct sn_irq_info *sn_irq_info) spin_unlock(&sn_irq_info_lock); register_intr_pda(sn_irq_info); +#ifdef CONFIG_SMP + cpuphys = cpu_physical_id(cpu); + set_irq_affinity_info(sn_irq_info->irq_irq, cpuphys, 0); +#endif } void sn_irq_unfixup(struct pci_dev *pci_dev) diff --git a/arch/ia64/sn/kernel/msi_sn.c b/arch/ia64/sn/kernel/msi_sn.c index 6ffd1f850d41..b3a435fd70fb 100644 --- a/arch/ia64/sn/kernel/msi_sn.c +++ b/arch/ia64/sn/kernel/msi_sn.c @@ -136,10 +136,6 @@ int sn_setup_msi_irq(unsigned int irq, struct pci_dev *pdev) */ msg.data = 0x100 + irq; -#ifdef CONFIG_SMP - set_irq_affinity_info(irq, sn_irq_info->irq_cpuid, 0); -#endif - write_msi_msg(irq, &msg); set_irq_chip_and_handler(irq, &sn_msi_chip, handle_edge_irq); diff --git a/arch/ia64/sn/kernel/setup.c b/arch/ia64/sn/kernel/setup.c index 7a2d824c5ce3..8571e52c2efd 100644 --- a/arch/ia64/sn/kernel/setup.c +++ b/arch/ia64/sn/kernel/setup.c @@ -388,6 +388,14 @@ void __init sn_setup(char **cmdline_p) ia64_sn_plat_set_error_handling_features(); // obsolete ia64_sn_set_os_feature(OSF_MCA_SLV_TO_OS_INIT_SLV); ia64_sn_set_os_feature(OSF_FEAT_LOG_SBES); + /* + * Note: The calls to notify the PROM of ACPI and PCI Segment + * support must be done prior to acpi_load_tables(), as + * an ACPI capable PROM will rebuild the DSDT as result + * of the call. + */ + ia64_sn_set_os_feature(OSF_PCISEGMENT_ENABLE); + ia64_sn_set_os_feature(OSF_ACPI_ENABLE); #if defined(CONFIG_VT) && defined(CONFIG_VGA_CONSOLE) @@ -413,6 +421,16 @@ void __init sn_setup(char **cmdline_p) if (! vga_console_membase) sn_scan_pcdp(); + /* + * Setup legacy IO space. + * vga_console_iobase maps to PCI IO Space address 0 on the + * bus containing the VGA console. + */ + if (vga_console_iobase) { + io_space[0].mmio_base = vga_console_iobase; + io_space[0].sparse = 0; + } + if (vga_console_membase) { /* usable vga ... make tty0 the preferred default console */ if (!strstr(*cmdline_p, "console=")) @@ -562,7 +580,7 @@ void __cpuinit sn_cpu_init(void) int slice; int cnode; int i; - static int wars_have_been_checked; + static int wars_have_been_checked, set_cpu0_number; cpuid = smp_processor_id(); if (cpuid == 0 && IS_MEDUSA()) { @@ -587,8 +605,16 @@ void __cpuinit sn_cpu_init(void) /* * Don't check status. The SAL call is not supported on all PROMs * but a failure is harmless. + * Architechtuallly, cpu_init is always called twice on cpu 0. We + * should set cpu_number on cpu 0 once. */ - (void) ia64_sn_set_cpu_number(cpuid); + if (cpuid == 0) { + if (!set_cpu0_number) { + (void) ia64_sn_set_cpu_number(cpuid); + set_cpu0_number = 1; + } + } else + (void) ia64_sn_set_cpu_number(cpuid); /* * The boot cpu makes this call again after platform initialization is @@ -751,5 +777,13 @@ int sn_prom_feature_available(int id) return 0; return test_bit(id, sn_prom_features); } + +void +sn_kernel_launch_event(void) +{ + /* ignore status until we understand possible failure, if any*/ + if (ia64_sn_kernel_launch_event()) + printk(KERN_ERR "KEXEC is not supported in this PROM, Please update the PROM.\n"); +} EXPORT_SYMBOL(sn_prom_feature_available); diff --git a/arch/ia64/sn/kernel/sn2/sn_hwperf.c b/arch/ia64/sn/kernel/sn2/sn_hwperf.c index 462ea178f49a..33367996d72d 100644 --- a/arch/ia64/sn/kernel/sn2/sn_hwperf.c +++ b/arch/ia64/sn/kernel/sn2/sn_hwperf.c @@ -189,7 +189,7 @@ static void print_pci_topology(struct seq_file *s) int e; for (sz = PAGE_SIZE; sz < 16 * PAGE_SIZE; sz += PAGE_SIZE) { - if (!(p = (char *)kmalloc(sz, GFP_KERNEL))) + if (!(p = kmalloc(sz, GFP_KERNEL))) break; e = ia64_sn_ioif_get_pci_topology(__pa(p), sz); if (e == SALRET_OK) diff --git a/arch/ia64/sn/kernel/tiocx.c b/arch/ia64/sn/kernel/tiocx.c index feaf1a6e8101..493380b2c05f 100644 --- a/arch/ia64/sn/kernel/tiocx.c +++ b/arch/ia64/sn/kernel/tiocx.c @@ -552,7 +552,7 @@ static void __exit tiocx_exit(void) bus_unregister(&tiocx_bus_type); } -subsys_initcall(tiocx_init); +fs_initcall(tiocx_init); module_exit(tiocx_exit); /************************************************************************ diff --git a/arch/ia64/sn/kernel/xpc_channel.c b/arch/ia64/sn/kernel/xpc_channel.c index 1f3540826e68..c08db9c2375d 100644 --- a/arch/ia64/sn/kernel/xpc_channel.c +++ b/arch/ia64/sn/kernel/xpc_channel.c @@ -632,7 +632,7 @@ xpc_process_connect(struct xpc_channel *ch, unsigned long *irq_flags) ch->number, ch->partid); spin_unlock_irqrestore(&ch->lock, *irq_flags); - xpc_create_kthreads(ch, 1); + xpc_create_kthreads(ch, 1, 0); spin_lock_irqsave(&ch->lock, *irq_flags); } @@ -754,12 +754,12 @@ xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags) /* make sure all activity has settled down first */ - if (atomic_read(&ch->references) > 0 || - ((ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) && - !(ch->flags & XPC_C_DISCONNECTINGCALLOUT_MADE))) { + if (atomic_read(&ch->kthreads_assigned) > 0 || + atomic_read(&ch->references) > 0) { return; } - DBUG_ON(atomic_read(&ch->kthreads_assigned) != 0); + DBUG_ON((ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) && + !(ch->flags & XPC_C_DISCONNECTINGCALLOUT_MADE)); if (part->act_state == XPC_P_DEACTIVATING) { /* can't proceed until the other side disengages from us */ @@ -1651,6 +1651,11 @@ xpc_disconnect_channel(const int line, struct xpc_channel *ch, /* wake all idle kthreads so they can exit */ if (atomic_read(&ch->kthreads_idle) > 0) { wake_up_all(&ch->idle_wq); + + } else if ((ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) && + !(ch->flags & XPC_C_DISCONNECTINGCALLOUT)) { + /* start a kthread that will do the xpcDisconnecting callout */ + xpc_create_kthreads(ch, 1, 1); } /* wake those waiting to allocate an entry from the local msg queue */ diff --git a/arch/ia64/sn/kernel/xpc_main.c b/arch/ia64/sn/kernel/xpc_main.c index fa96dfc0e1aa..7a387d237363 100644 --- a/arch/ia64/sn/kernel/xpc_main.c +++ b/arch/ia64/sn/kernel/xpc_main.c @@ -681,7 +681,7 @@ xpc_activate_kthreads(struct xpc_channel *ch, int needed) dev_dbg(xpc_chan, "create %d new kthreads, partid=%d, channel=%d\n", needed, ch->partid, ch->number); - xpc_create_kthreads(ch, needed); + xpc_create_kthreads(ch, needed, 0); } @@ -775,26 +775,28 @@ xpc_daemonize_kthread(void *args) xpc_kthread_waitmsgs(part, ch); } - if (atomic_dec_return(&ch->kthreads_assigned) == 0) { - spin_lock_irqsave(&ch->lock, irq_flags); - if ((ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) && - !(ch->flags & XPC_C_DISCONNECTINGCALLOUT)) { - ch->flags |= XPC_C_DISCONNECTINGCALLOUT; - spin_unlock_irqrestore(&ch->lock, irq_flags); + /* let registerer know that connection is disconnecting */ - xpc_disconnect_callout(ch, xpcDisconnecting); - - spin_lock_irqsave(&ch->lock, irq_flags); - ch->flags |= XPC_C_DISCONNECTINGCALLOUT_MADE; - } + spin_lock_irqsave(&ch->lock, irq_flags); + if ((ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) && + !(ch->flags & XPC_C_DISCONNECTINGCALLOUT)) { + ch->flags |= XPC_C_DISCONNECTINGCALLOUT; spin_unlock_irqrestore(&ch->lock, irq_flags); + + xpc_disconnect_callout(ch, xpcDisconnecting); + + spin_lock_irqsave(&ch->lock, irq_flags); + ch->flags |= XPC_C_DISCONNECTINGCALLOUT_MADE; + } + spin_unlock_irqrestore(&ch->lock, irq_flags); + + if (atomic_dec_return(&ch->kthreads_assigned) == 0) { if (atomic_dec_return(&part->nchannels_engaged) == 0) { xpc_mark_partition_disengaged(part); xpc_IPI_send_disengage(part); } } - xpc_msgqueue_deref(ch); dev_dbg(xpc_chan, "kthread exiting, partid=%d, channel=%d\n", @@ -818,7 +820,8 @@ xpc_daemonize_kthread(void *args) * partition. */ void -xpc_create_kthreads(struct xpc_channel *ch, int needed) +xpc_create_kthreads(struct xpc_channel *ch, int needed, + int ignore_disconnecting) { unsigned long irq_flags; pid_t pid; @@ -833,16 +836,38 @@ xpc_create_kthreads(struct xpc_channel *ch, int needed) * kthread. That kthread is responsible for doing the * counterpart to the following before it exits. */ + if (ignore_disconnecting) { + if (!atomic_inc_not_zero(&ch->kthreads_assigned)) { + /* kthreads assigned had gone to zero */ + BUG_ON(!(ch->flags & + XPC_C_DISCONNECTINGCALLOUT_MADE)); + break; + } + + } else if (ch->flags & XPC_C_DISCONNECTING) { + break; + + } else if (atomic_inc_return(&ch->kthreads_assigned) == 1) { + if (atomic_inc_return(&part->nchannels_engaged) == 1) + xpc_mark_partition_engaged(part); + } (void) xpc_part_ref(part); xpc_msgqueue_ref(ch); - if (atomic_inc_return(&ch->kthreads_assigned) == 1 && - atomic_inc_return(&part->nchannels_engaged) == 1) { - xpc_mark_partition_engaged(part); - } pid = kernel_thread(xpc_daemonize_kthread, (void *) args, 0); if (pid < 0) { /* the fork failed */ + + /* + * NOTE: if (ignore_disconnecting && + * !(ch->flags & XPC_C_DISCONNECTINGCALLOUT)) is true, + * then we'll deadlock if all other kthreads assigned + * to this channel are blocked in the channel's + * registerer, because the only thing that will unblock + * them is the xpcDisconnecting callout that this + * failed kernel_thread would have made. + */ + if (atomic_dec_return(&ch->kthreads_assigned) == 0 && atomic_dec_return(&part->nchannels_engaged) == 0) { xpc_mark_partition_disengaged(part); @@ -857,9 +882,6 @@ xpc_create_kthreads(struct xpc_channel *ch, int needed) * Flag this as an error only if we have an * insufficient #of kthreads for the channel * to function. - * - * No xpc_msgqueue_ref() is needed here since - * the channel mgr is doing this. */ spin_lock_irqsave(&ch->lock, irq_flags); XPC_DISCONNECT_CHANNEL(ch, xpcLackOfResources, diff --git a/arch/ia64/sn/pci/pcibr/pcibr_provider.c b/arch/ia64/sn/pci/pcibr/pcibr_provider.c index 27dd7df0f446..6846dc9b432d 100644 --- a/arch/ia64/sn/pci/pcibr/pcibr_provider.c +++ b/arch/ia64/sn/pci/pcibr/pcibr_provider.c @@ -3,7 +3,7 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. * - * Copyright (C) 2001-2004 Silicon Graphics, Inc. All rights reserved. + * Copyright (C) 2001-2004, 2006 Silicon Graphics, Inc. All rights reserved. */ #include <linux/interrupt.h> @@ -109,7 +109,6 @@ void * pcibr_bus_fixup(struct pcibus_bussoft *prom_bussoft, struct pci_controller *controller) { int nasid, cnode, j; - cnodeid_t near_cnode; struct hubdev_info *hubdev_info; struct pcibus_info *soft; struct sn_flush_device_kernel *sn_flush_device_kernel; @@ -186,20 +185,6 @@ pcibr_bus_fixup(struct pcibus_bussoft *prom_bussoft, struct pci_controller *cont return NULL; } - if (prom_bussoft->bs_asic_type == PCIIO_ASIC_TYPE_TIOCP) { - /* TIO PCI Bridge: find nearest node with CPUs */ - int e = sn_hwperf_get_nearest_node(cnode, NULL, &near_cnode); - - if (e < 0) { - near_cnode = (cnodeid_t)-1; /* use any node */ - printk(KERN_WARNING "pcibr_bus_fixup: failed to find " - "near node with CPUs to TIO node %d, err=%d\n", - cnode, e); - } - controller->node = near_cnode; - } - else - controller->node = cnode; return soft; } diff --git a/arch/ia64/sn/pci/tioce_provider.c b/arch/ia64/sn/pci/tioce_provider.c index 46e16dcf5971..35f854fb6120 100644 --- a/arch/ia64/sn/pci/tioce_provider.c +++ b/arch/ia64/sn/pci/tioce_provider.c @@ -15,7 +15,6 @@ #include <asm/sn/pcidev.h> #include <asm/sn/pcibus_provider_defs.h> #include <asm/sn/tioce_provider.h> -#include <asm/sn/sn2/sn_hwperf.h> /* * 1/26/2006 @@ -990,8 +989,6 @@ tioce_target_interrupt(struct sn_irq_info *sn_irq_info) static void * tioce_bus_fixup(struct pcibus_bussoft *prom_bussoft, struct pci_controller *controller) { - int my_nasid; - cnodeid_t my_cnode, mem_cnode; struct tioce_common *tioce_common; struct tioce_kernel *tioce_kern; struct tioce __iomem *tioce_mmr; @@ -1035,21 +1032,6 @@ tioce_bus_fixup(struct pcibus_bussoft *prom_bussoft, struct pci_controller *cont tioce_common->ce_pcibus.bs_persist_segment, tioce_common->ce_pcibus.bs_persist_busnum); - /* - * identify closest nasid for memory allocations - */ - - my_nasid = NASID_GET(tioce_common->ce_pcibus.bs_base); - my_cnode = nasid_to_cnodeid(my_nasid); - - if (sn_hwperf_get_nearest_node(my_cnode, &mem_cnode, NULL) < 0) { - printk(KERN_WARNING "tioce_bus_fixup: failed to find " - "closest node with MEM to TIO node %d\n", my_cnode); - mem_cnode = (cnodeid_t)-1; /* use any node */ - } - - controller->node = mem_cnode; - return tioce_common; } |