diff options
author | David S. Miller <davem@sunset.davemloft.net> | 2007-02-10 17:41:02 -0800 |
---|---|---|
committer | David S. Miller <davem@sunset.davemloft.net> | 2007-02-10 23:50:37 -0800 |
commit | 35a17eb6a87c9ceb0d35dcb51f464fe6faf584ab (patch) | |
tree | 7f56095a56e9f62dca7514cdfe781739548011f5 /arch/sparc64/kernel/pci_sun4v.c | |
parent | 68c921869491c119142612fa5796c9f8b4e9970b (diff) | |
download | lwn-35a17eb6a87c9ceb0d35dcb51f464fe6faf584ab.tar.gz lwn-35a17eb6a87c9ceb0d35dcb51f464fe6faf584ab.zip |
[SPARC64]: Add PCI MSI support on Niagara.
This is kind of hokey, we could use the hardware provided facilities
much better.
MSIs are assosciated with MSI Queues. MSI Queues generate interrupts
when any MSI assosciated with it is signalled. This suggests a
two-tiered IRQ dispatch scheme:
MSI Queue interrupt --> queue interrupt handler
MSI dispatch --> driver interrupt handler
But we just get one-level under Linux currently. What I'd like to do
is possibly stick the IRQ actions into a per-MSI-Queue data structure,
and dispatch them form there, but the generic IRQ layer doesn't
provide a way to do that right now.
So, the current kludge is to "ACK" the interrupt by processing the
MSI Queue data structures and ACK'ing them, then we run the actual
handler like normal.
We are wasting a lot of useful information, for example the MSI data
and address are provided with ever MSI, as well as a system tick if
available. If we could pass this into the IRQ handler it could help
with certain things, in particular for PCI-Express error messages.
The MSI entries on sparc64 also tell you exactly which bus/device/fn
sent the MSI, which would be great for error handling when no
registered IRQ handler can service the interrupt.
We override the disable/enable IRQ chip methods in sun4v_msi, so we
have to call {mask,unmask}_msi_irq() directly from there. This is
another ugly wart.
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'arch/sparc64/kernel/pci_sun4v.c')
-rw-r--r-- | arch/sparc64/kernel/pci_sun4v.c | 444 |
1 files changed, 444 insertions, 0 deletions
diff --git a/arch/sparc64/kernel/pci_sun4v.c b/arch/sparc64/kernel/pci_sun4v.c index 6b04794b7a97..ec22cd61ec8c 100644 --- a/arch/sparc64/kernel/pci_sun4v.c +++ b/arch/sparc64/kernel/pci_sun4v.c @@ -10,6 +10,8 @@ #include <linux/slab.h> #include <linux/interrupt.h> #include <linux/percpu.h> +#include <linux/irq.h> +#include <linux/msi.h> #include <asm/pbm.h> #include <asm/iommu.h> @@ -1074,6 +1076,443 @@ static void pci_sun4v_get_bus_range(struct pci_pbm_info *pbm) } +#ifdef CONFIG_PCI_MSI +struct pci_sun4v_msiq_entry { + u64 version_type; +#define MSIQ_VERSION_MASK 0xffffffff00000000UL +#define MSIQ_VERSION_SHIFT 32 +#define MSIQ_TYPE_MASK 0x00000000000000ffUL +#define MSIQ_TYPE_SHIFT 0 +#define MSIQ_TYPE_NONE 0x00 +#define MSIQ_TYPE_MSG 0x01 +#define MSIQ_TYPE_MSI32 0x02 +#define MSIQ_TYPE_MSI64 0x03 +#define MSIQ_TYPE_INTX 0x08 +#define MSIQ_TYPE_NONE2 0xff + + u64 intx_sysino; + u64 reserved1; + u64 stick; + u64 req_id; /* bus/device/func */ +#define MSIQ_REQID_BUS_MASK 0xff00UL +#define MSIQ_REQID_BUS_SHIFT 8 +#define MSIQ_REQID_DEVICE_MASK 0x00f8UL +#define MSIQ_REQID_DEVICE_SHIFT 3 +#define MSIQ_REQID_FUNC_MASK 0x0007UL +#define MSIQ_REQID_FUNC_SHIFT 0 + + u64 msi_address; + + /* The format of this value is message type dependant. + * For MSI bits 15:0 are the data from the MSI packet. + * For MSI-X bits 31:0 are the data from the MSI packet. + * For MSG, the message code and message routing code where: + * bits 39:32 is the bus/device/fn of the msg target-id + * bits 18:16 is the message routing code + * bits 7:0 is the message code + * For INTx the low order 2-bits are: + * 00 - INTA + * 01 - INTB + * 10 - INTC + * 11 - INTD + */ + u64 msi_data; + + u64 reserved2; +}; + +/* For now this just runs as a pre-handler for the real interrupt handler. + * So we just walk through the queue and ACK all the entries, update the + * head pointer, and return. + * + * In the longer term it would be nice to do something more integrated + * wherein we can pass in some of this MSI info to the drivers. This + * would be most useful for PCIe fabric error messages, although we could + * invoke those directly from the loop here in order to pass the info around. + */ +static void pci_sun4v_msi_prehandler(unsigned int ino, void *data1, void *data2) +{ + struct pci_pbm_info *pbm = data1; + struct pci_sun4v_msiq_entry *base, *ep; + unsigned long msiqid, orig_head, head, type, err; + + msiqid = (unsigned long) data2; + + head = 0xdeadbeef; + err = pci_sun4v_msiq_gethead(pbm->devhandle, msiqid, &head); + if (unlikely(err)) + goto hv_error_get; + + if (unlikely(head >= (pbm->msiq_ent_count * sizeof(struct pci_sun4v_msiq_entry)))) + goto bad_offset; + + head /= sizeof(struct pci_sun4v_msiq_entry); + orig_head = head; + base = (pbm->msi_queues + ((msiqid - pbm->msiq_first) * + (pbm->msiq_ent_count * + sizeof(struct pci_sun4v_msiq_entry)))); + ep = &base[head]; + while ((ep->version_type & MSIQ_TYPE_MASK) != 0) { + type = (ep->version_type & MSIQ_TYPE_MASK) >> MSIQ_TYPE_SHIFT; + if (unlikely(type != MSIQ_TYPE_MSI32 && + type != MSIQ_TYPE_MSI64)) + goto bad_type; + + pci_sun4v_msi_setstate(pbm->devhandle, + ep->msi_data /* msi_num */, + HV_MSISTATE_IDLE); + + /* Clear the entry. */ + ep->version_type &= ~MSIQ_TYPE_MASK; + + /* Go to next entry in ring. */ + head++; + if (head >= pbm->msiq_ent_count) + head = 0; + ep = &base[head]; + } + + if (likely(head != orig_head)) { + /* ACK entries by updating head pointer. */ + head *= sizeof(struct pci_sun4v_msiq_entry); + err = pci_sun4v_msiq_sethead(pbm->devhandle, msiqid, head); + if (unlikely(err)) + goto hv_error_set; + } + return; + +hv_error_set: + printk(KERN_EMERG "MSI: Hypervisor set head gives error %lu\n", err); + goto hv_error_cont; + +hv_error_get: + printk(KERN_EMERG "MSI: Hypervisor get head gives error %lu\n", err); + +hv_error_cont: + printk(KERN_EMERG "MSI: devhandle[%x] msiqid[%lx] head[%lu]\n", + pbm->devhandle, msiqid, head); + return; + +bad_offset: + printk(KERN_EMERG "MSI: Hypervisor gives bad offset %lx max(%lx)\n", + head, pbm->msiq_ent_count * sizeof(struct pci_sun4v_msiq_entry)); + return; + +bad_type: + printk(KERN_EMERG "MSI: Entry has bad type %lx\n", type); + return; +} + +static int msi_bitmap_alloc(struct pci_pbm_info *pbm) +{ + unsigned long size, bits_per_ulong; + + bits_per_ulong = sizeof(unsigned long) * 8; + size = (pbm->msi_num + (bits_per_ulong - 1)) & ~(bits_per_ulong - 1); + size /= 8; + BUG_ON(size % sizeof(unsigned long)); + + pbm->msi_bitmap = kzalloc(size, GFP_KERNEL); + if (!pbm->msi_bitmap) + return -ENOMEM; + + return 0; +} + +static void msi_bitmap_free(struct pci_pbm_info *pbm) +{ + kfree(pbm->msi_bitmap); + pbm->msi_bitmap = NULL; +} + +static int msi_queue_alloc(struct pci_pbm_info *pbm) +{ + unsigned long q_size, alloc_size, pages, order; + int i; + + q_size = pbm->msiq_ent_count * sizeof(struct pci_sun4v_msiq_entry); + alloc_size = (pbm->msiq_num * q_size); + order = get_order(alloc_size); + pages = __get_free_pages(GFP_KERNEL | __GFP_COMP, order); + if (pages == 0UL) { + printk(KERN_ERR "MSI: Cannot allocate MSI queues (o=%lu).\n", + order); + return -ENOMEM; + } + memset((char *)pages, 0, PAGE_SIZE << order); + pbm->msi_queues = (void *) pages; + + for (i = 0; i < pbm->msiq_num; i++) { + unsigned long err, base = __pa(pages + (i * q_size)); + unsigned long ret1, ret2; + + err = pci_sun4v_msiq_conf(pbm->devhandle, + pbm->msiq_first + i, + base, pbm->msiq_ent_count); + if (err) { + printk(KERN_ERR "MSI: msiq register fails (err=%lu)\n", + err); + goto h_error; + } + + err = pci_sun4v_msiq_info(pbm->devhandle, + pbm->msiq_first + i, + &ret1, &ret2); + if (err) { + printk(KERN_ERR "MSI: Cannot read msiq (err=%lu)\n", + err); + goto h_error; + } + if (ret1 != base || ret2 != pbm->msiq_ent_count) { + printk(KERN_ERR "MSI: Bogus qconf " + "expected[%lx:%x] got[%lx:%lx]\n", + base, pbm->msiq_ent_count, + ret1, ret2); + goto h_error; + } + } + + return 0; + +h_error: + free_pages(pages, order); + return -EINVAL; +} + +static void pci_sun4v_msi_init(struct pci_pbm_info *pbm) +{ + u32 *val; + int len; + + val = of_get_property(pbm->prom_node, "#msi-eqs", &len); + if (!val || len != 4) + goto no_msi; + pbm->msiq_num = *val; + if (pbm->msiq_num) { + struct msiq_prop { + u32 first_msiq; + u32 num_msiq; + u32 first_devino; + } *mqp; + struct msi_range_prop { + u32 first_msi; + u32 num_msi; + } *mrng; + struct addr_range_prop { + u32 msi32_high; + u32 msi32_low; + u32 msi32_len; + u32 msi64_high; + u32 msi64_low; + u32 msi64_len; + } *arng; + + val = of_get_property(pbm->prom_node, "msi-eq-size", &len); + if (!val || len != 4) + goto no_msi; + + pbm->msiq_ent_count = *val; + + mqp = of_get_property(pbm->prom_node, + "msi-eq-to-devino", &len); + if (!mqp || len != sizeof(struct msiq_prop)) + goto no_msi; + + pbm->msiq_first = mqp->first_msiq; + pbm->msiq_first_devino = mqp->first_devino; + + val = of_get_property(pbm->prom_node, "#msi", &len); + if (!val || len != 4) + goto no_msi; + pbm->msi_num = *val; + + mrng = of_get_property(pbm->prom_node, "msi-ranges", &len); + if (!mrng || len != sizeof(struct msi_range_prop)) + goto no_msi; + pbm->msi_first = mrng->first_msi; + + val = of_get_property(pbm->prom_node, "msi-data-mask", &len); + if (!val || len != 4) + goto no_msi; + pbm->msi_data_mask = *val; + + val = of_get_property(pbm->prom_node, "msix-data-width", &len); + if (!val || len != 4) + goto no_msi; + pbm->msix_data_width = *val; + + arng = of_get_property(pbm->prom_node, "msi-address-ranges", + &len); + if (!arng || len != sizeof(struct addr_range_prop)) + goto no_msi; + pbm->msi32_start = ((u64)arng->msi32_high << 32) | + (u64) arng->msi32_low; + pbm->msi64_start = ((u64)arng->msi64_high << 32) | + (u64) arng->msi64_low; + pbm->msi32_len = arng->msi32_len; + pbm->msi64_len = arng->msi64_len; + + if (msi_bitmap_alloc(pbm)) + goto no_msi; + + if (msi_queue_alloc(pbm)) { + msi_bitmap_free(pbm); + goto no_msi; + } + + printk(KERN_INFO "%s: MSI Queue first[%u] num[%u] count[%u] " + "devino[0x%x]\n", + pbm->name, + pbm->msiq_first, pbm->msiq_num, + pbm->msiq_ent_count, + pbm->msiq_first_devino); + printk(KERN_INFO "%s: MSI first[%u] num[%u] mask[0x%x] " + "width[%u]\n", + pbm->name, + pbm->msi_first, pbm->msi_num, pbm->msi_data_mask, + pbm->msix_data_width); + printk(KERN_INFO "%s: MSI addr32[0x%lx:0x%x] " + "addr64[0x%lx:0x%x]\n", + pbm->name, + pbm->msi32_start, pbm->msi32_len, + pbm->msi64_start, pbm->msi64_len); + printk(KERN_INFO "%s: MSI queues at RA [%p]\n", + pbm->name, + pbm->msi_queues); + } + + return; + +no_msi: + pbm->msiq_num = 0; + printk(KERN_INFO "%s: No MSI support.\n", pbm->name); +} + +static int alloc_msi(struct pci_pbm_info *pbm) +{ + int i; + + for (i = 0; i < pbm->msi_num; i++) { + if (!test_and_set_bit(i, pbm->msi_bitmap)) + return i + pbm->msi_first; + } + + return -ENOENT; +} + +static void free_msi(struct pci_pbm_info *pbm, int msi_num) +{ + msi_num -= pbm->msi_first; + clear_bit(msi_num, pbm->msi_bitmap); +} + +static int pci_sun4v_setup_msi_irq(unsigned int *virt_irq_p, + struct pci_dev *pdev, + struct msi_desc *entry) +{ + struct pcidev_cookie *pcp = pdev->sysdata; + struct pci_pbm_info *pbm = pcp->pbm; + unsigned long devino, msiqid; + struct msi_msg msg; + int msi_num, err; + + *virt_irq_p = 0; + + msi_num = alloc_msi(pbm); + if (msi_num < 0) + return msi_num; + + devino = sun4v_build_msi(pbm->devhandle, virt_irq_p, + pbm->msiq_first_devino, + (pbm->msiq_first_devino + + pbm->msiq_num)); + err = -ENOMEM; + if (!devino) + goto out_err; + + set_irq_msi(*virt_irq_p, entry); + + msiqid = ((devino - pbm->msiq_first_devino) + + pbm->msiq_first); + + err = -EINVAL; + if (pci_sun4v_msiq_setstate(pbm->devhandle, msiqid, HV_MSIQSTATE_IDLE)) + if (err) + goto out_err; + + if (pci_sun4v_msiq_setvalid(pbm->devhandle, msiqid, HV_MSIQ_VALID)) + goto out_err; + + if (pci_sun4v_msi_setmsiq(pbm->devhandle, + msi_num, msiqid, + (entry->msi_attrib.is_64 ? + HV_MSITYPE_MSI64 : HV_MSITYPE_MSI32))) + goto out_err; + + if (pci_sun4v_msi_setstate(pbm->devhandle, msi_num, HV_MSISTATE_IDLE)) + goto out_err; + + if (pci_sun4v_msi_setvalid(pbm->devhandle, msi_num, HV_MSIVALID_VALID)) + goto out_err; + + pcp->msi_num = msi_num; + + if (entry->msi_attrib.is_64) { + msg.address_hi = pbm->msi64_start >> 32; + msg.address_lo = pbm->msi64_start & 0xffffffff; + } else { + msg.address_hi = 0; + msg.address_lo = pbm->msi32_start; + } + msg.data = msi_num; + write_msi_msg(*virt_irq_p, &msg); + + irq_install_pre_handler(*virt_irq_p, + pci_sun4v_msi_prehandler, + pbm, (void *) msiqid); + + return 0; + +out_err: + free_msi(pbm, msi_num); + sun4v_destroy_msi(*virt_irq_p); + *virt_irq_p = 0; + return err; + +} + +static void pci_sun4v_teardown_msi_irq(unsigned int virt_irq, + struct pci_dev *pdev) +{ + struct pcidev_cookie *pcp = pdev->sysdata; + struct pci_pbm_info *pbm = pcp->pbm; + unsigned long msiqid, err; + unsigned int msi_num; + + msi_num = pcp->msi_num; + err = pci_sun4v_msi_getmsiq(pbm->devhandle, msi_num, &msiqid); + if (err) { + printk(KERN_ERR "%s: getmsiq gives error %lu\n", + pbm->name, err); + return; + } + + pci_sun4v_msi_setvalid(pbm->devhandle, msi_num, HV_MSIVALID_INVALID); + pci_sun4v_msiq_setvalid(pbm->devhandle, msiqid, HV_MSIQ_INVALID); + + free_msi(pbm, msi_num); + + /* The sun4v_destroy_msi() will liberate the devino and thus the MSIQ + * allocation. + */ + sun4v_destroy_msi(virt_irq); +} +#else /* CONFIG_PCI_MSI */ +static void pci_sun4v_msi_init(struct pci_pbm_info *pbm) +{ +} +#endif /* !(CONFIG_PCI_MSI) */ + static void pci_sun4v_pbm_init(struct pci_controller_info *p, struct device_node *dp, u32 devhandle) { struct pci_pbm_info *pbm; @@ -1119,6 +1558,7 @@ static void pci_sun4v_pbm_init(struct pci_controller_info *p, struct device_node pci_sun4v_get_bus_range(pbm); pci_sun4v_iommu_init(pbm); + pci_sun4v_msi_init(pbm); pdev_htab_populate(pbm); } @@ -1187,6 +1627,10 @@ void sun4v_pci_init(struct device_node *dp, char *model_name) p->scan_bus = pci_sun4v_scan_bus; p->base_address_update = pci_sun4v_base_address_update; p->resource_adjust = pci_sun4v_resource_adjust; +#ifdef CONFIG_PCI_MSI + p->setup_msi_irq = pci_sun4v_setup_msi_irq; + p->teardown_msi_irq = pci_sun4v_teardown_msi_irq; +#endif p->pci_ops = &pci_sun4v_ops; /* Like PSYCHO and SCHIZO we have a 2GB aligned area |