summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2009-12-16 10:11:38 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2009-12-16 10:11:38 -0800
commita79960e576ebca9dbf24489b562689f2be7e9ff0 (patch)
treeb0748839230c2bba1d49ccdd732608d7d1f334cb
parent661e338f728d101b4839b6b157d44cfcb80e3c5e (diff)
parentcd7bcf32d42b15891620b3f1387a00178b54291a (diff)
downloadlwn-a79960e576ebca9dbf24489b562689f2be7e9ff0.tar.gz
lwn-a79960e576ebca9dbf24489b562689f2be7e9ff0.zip
Merge git://git.infradead.org/iommu-2.6
* git://git.infradead.org/iommu-2.6: implement early_io{re,un}map for ia64 Revert "Intel IOMMU: Avoid memory allocation failures in dma map api calls" intel-iommu: ignore page table validation in pass through mode intel-iommu: Fix oops with intel_iommu=igfx_off intel-iommu: Check for an RMRR which ends before it starts. intel-iommu: Apply BIOS sanity checks for interrupt remapping too. intel-iommu: Detect DMAR in hyperspace at probe time. dmar: Fix build failure without NUMA, warn on bogus RHSA tables and don't abort iommu: Allocate dma-remapping structures using numa locality info intr_remap: Allocate intr-remapping table using numa locality info dmar: Allocate queued invalidation structure using numa locality info dmar: support for parsing Remapping Hardware Static Affinity structure
-rw-r--r--arch/ia64/include/asm/io.h2
-rw-r--r--arch/ia64/mm/ioremap.c11
-rw-r--r--drivers/pci/dmar.c110
-rw-r--r--drivers/pci/intel-iommu.c78
-rw-r--r--drivers/pci/intr_remapping.c3
-rw-r--r--include/linux/intel-iommu.h1
6 files changed, 154 insertions, 51 deletions
diff --git a/arch/ia64/include/asm/io.h b/arch/ia64/include/asm/io.h
index 0d9d16e2d949..cc8335eb3110 100644
--- a/arch/ia64/include/asm/io.h
+++ b/arch/ia64/include/asm/io.h
@@ -424,6 +424,8 @@ __writeq (unsigned long val, volatile void __iomem *addr)
extern void __iomem * ioremap(unsigned long offset, unsigned long size);
extern void __iomem * ioremap_nocache (unsigned long offset, unsigned long size);
extern void iounmap (volatile void __iomem *addr);
+extern void __iomem * early_ioremap (unsigned long phys_addr, unsigned long size);
+extern void early_iounmap (volatile void __iomem *addr, unsigned long size);
/*
* String version of IO memory access ops:
diff --git a/arch/ia64/mm/ioremap.c b/arch/ia64/mm/ioremap.c
index 2a140627dfd6..3dccdd8eb275 100644
--- a/arch/ia64/mm/ioremap.c
+++ b/arch/ia64/mm/ioremap.c
@@ -22,6 +22,12 @@ __ioremap (unsigned long phys_addr)
}
void __iomem *
+early_ioremap (unsigned long phys_addr, unsigned long size)
+{
+ return __ioremap(phys_addr);
+}
+
+void __iomem *
ioremap (unsigned long phys_addr, unsigned long size)
{
void __iomem *addr;
@@ -102,6 +108,11 @@ ioremap_nocache (unsigned long phys_addr, unsigned long size)
EXPORT_SYMBOL(ioremap_nocache);
void
+early_iounmap (volatile void __iomem *addr, unsigned long size)
+{
+}
+
+void
iounmap (volatile void __iomem *addr)
{
if (REGION_NUMBER(addr) == RGN_GATE)
diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c
index 6cdc931f7c17..83aae4747594 100644
--- a/drivers/pci/dmar.c
+++ b/drivers/pci/dmar.c
@@ -339,6 +339,35 @@ found:
}
#endif
+#ifdef CONFIG_ACPI_NUMA
+static int __init
+dmar_parse_one_rhsa(struct acpi_dmar_header *header)
+{
+ struct acpi_dmar_rhsa *rhsa;
+ struct dmar_drhd_unit *drhd;
+
+ rhsa = (struct acpi_dmar_rhsa *)header;
+ for_each_drhd_unit(drhd) {
+ if (drhd->reg_base_addr == rhsa->base_address) {
+ int node = acpi_map_pxm_to_node(rhsa->proximity_domain);
+
+ if (!node_online(node))
+ node = -1;
+ drhd->iommu->node = node;
+ return 0;
+ }
+ }
+ WARN(1, "Your BIOS is broken; RHSA refers to non-existent DMAR unit at %llx\n"
+ "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
+ drhd->reg_base_addr,
+ dmi_get_system_info(DMI_BIOS_VENDOR),
+ dmi_get_system_info(DMI_BIOS_VERSION),
+ dmi_get_system_info(DMI_PRODUCT_VERSION));
+
+ return 0;
+}
+#endif
+
static void __init
dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
{
@@ -458,7 +487,9 @@ parse_dmar_table(void)
#endif
break;
case ACPI_DMAR_HARDWARE_AFFINITY:
- /* We don't do anything with RHSA (yet?) */
+#ifdef CONFIG_ACPI_NUMA
+ ret = dmar_parse_one_rhsa(entry_header);
+#endif
break;
default:
printk(KERN_WARNING PREFIX
@@ -582,6 +613,8 @@ int __init dmar_table_init(void)
return 0;
}
+static int bios_warned;
+
int __init check_zero_address(void)
{
struct acpi_table_dmar *dmar;
@@ -601,6 +634,9 @@ int __init check_zero_address(void)
}
if (entry_header->type == ACPI_DMAR_TYPE_HARDWARE_UNIT) {
+ void __iomem *addr;
+ u64 cap, ecap;
+
drhd = (void *)entry_header;
if (!drhd->address) {
/* Promote an attitude of violence to a BIOS engineer today */
@@ -609,17 +645,40 @@ int __init check_zero_address(void)
dmi_get_system_info(DMI_BIOS_VENDOR),
dmi_get_system_info(DMI_BIOS_VERSION),
dmi_get_system_info(DMI_PRODUCT_VERSION));
-#ifdef CONFIG_DMAR
- dmar_disabled = 1;
-#endif
- return 0;
+ bios_warned = 1;
+ goto failed;
+ }
+
+ addr = early_ioremap(drhd->address, VTD_PAGE_SIZE);
+ if (!addr ) {
+ printk("IOMMU: can't validate: %llx\n", drhd->address);
+ goto failed;
+ }
+ cap = dmar_readq(addr + DMAR_CAP_REG);
+ ecap = dmar_readq(addr + DMAR_ECAP_REG);
+ early_iounmap(addr, VTD_PAGE_SIZE);
+ if (cap == (uint64_t)-1 && ecap == (uint64_t)-1) {
+ /* Promote an attitude of violence to a BIOS engineer today */
+ WARN(1, "Your BIOS is broken; DMAR reported at address %llx returns all ones!\n"
+ "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
+ drhd->address,
+ dmi_get_system_info(DMI_BIOS_VENDOR),
+ dmi_get_system_info(DMI_BIOS_VERSION),
+ dmi_get_system_info(DMI_PRODUCT_VERSION));
+ bios_warned = 1;
+ goto failed;
}
- break;
}
entry_header = ((void *)entry_header + entry_header->length);
}
return 1;
+
+failed:
+#ifdef CONFIG_DMAR
+ dmar_disabled = 1;
+#endif
+ return 0;
}
void __init detect_intel_iommu(void)
@@ -670,6 +729,18 @@ int alloc_iommu(struct dmar_drhd_unit *drhd)
int agaw = 0;
int msagaw = 0;
+ if (!drhd->reg_base_addr) {
+ if (!bios_warned) {
+ WARN(1, "Your BIOS is broken; DMAR reported at address zero!\n"
+ "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
+ dmi_get_system_info(DMI_BIOS_VENDOR),
+ dmi_get_system_info(DMI_BIOS_VERSION),
+ dmi_get_system_info(DMI_PRODUCT_VERSION));
+ bios_warned = 1;
+ }
+ return -EINVAL;
+ }
+
iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
if (!iommu)
return -ENOMEM;
@@ -686,13 +757,16 @@ int alloc_iommu(struct dmar_drhd_unit *drhd)
iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
if (iommu->cap == (uint64_t)-1 && iommu->ecap == (uint64_t)-1) {
- /* Promote an attitude of violence to a BIOS engineer today */
- WARN(1, "Your BIOS is broken; DMAR reported at address %llx returns all ones!\n"
- "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
- drhd->reg_base_addr,
- dmi_get_system_info(DMI_BIOS_VENDOR),
- dmi_get_system_info(DMI_BIOS_VERSION),
- dmi_get_system_info(DMI_PRODUCT_VERSION));
+ if (!bios_warned) {
+ /* Promote an attitude of violence to a BIOS engineer today */
+ WARN(1, "Your BIOS is broken; DMAR reported at address %llx returns all ones!\n"
+ "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
+ drhd->reg_base_addr,
+ dmi_get_system_info(DMI_BIOS_VENDOR),
+ dmi_get_system_info(DMI_BIOS_VERSION),
+ dmi_get_system_info(DMI_PRODUCT_VERSION));
+ bios_warned = 1;
+ }
goto err_unmap;
}
@@ -715,6 +789,8 @@ int alloc_iommu(struct dmar_drhd_unit *drhd)
iommu->agaw = agaw;
iommu->msagaw = msagaw;
+ iommu->node = -1;
+
/* the registers might be more than one page */
map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
cap_max_fault_reg_offset(iommu->cap));
@@ -1056,6 +1132,7 @@ static void __dmar_enable_qi(struct intel_iommu *iommu)
int dmar_enable_qi(struct intel_iommu *iommu)
{
struct q_inval *qi;
+ struct page *desc_page;
if (!ecap_qis(iommu->ecap))
return -ENOENT;
@@ -1072,13 +1149,16 @@ int dmar_enable_qi(struct intel_iommu *iommu)
qi = iommu->qi;
- qi->desc = (void *)(get_zeroed_page(GFP_ATOMIC));
- if (!qi->desc) {
+
+ desc_page = alloc_pages_node(iommu->node, GFP_ATOMIC | __GFP_ZERO, 0);
+ if (!desc_page) {
kfree(qi);
iommu->qi = 0;
return -ENOMEM;
}
+ qi->desc = page_address(desc_page);
+
qi->desc_status = kmalloc(QI_LENGTH * sizeof(int), GFP_ATOMIC);
if (!qi->desc_status) {
free_page((unsigned long) qi->desc);
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 8d6159426311..e56f9bed6f2b 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -277,6 +277,7 @@ static int hw_pass_through = 1;
struct dmar_domain {
int id; /* domain id */
+ int nid; /* node id */
unsigned long iommu_bmp; /* bitmap of iommus this domain uses*/
struct list_head devices; /* all devices' list */
@@ -386,30 +387,14 @@ static struct kmem_cache *iommu_domain_cache;
static struct kmem_cache *iommu_devinfo_cache;
static struct kmem_cache *iommu_iova_cache;
-static inline void *iommu_kmem_cache_alloc(struct kmem_cache *cachep)
+static inline void *alloc_pgtable_page(int node)
{
- unsigned int flags;
- void *vaddr;
-
- /* trying to avoid low memory issues */
- flags = current->flags & PF_MEMALLOC;
- current->flags |= PF_MEMALLOC;
- vaddr = kmem_cache_alloc(cachep, GFP_ATOMIC);
- current->flags &= (~PF_MEMALLOC | flags);
- return vaddr;
-}
-
+ struct page *page;
+ void *vaddr = NULL;
-static inline void *alloc_pgtable_page(void)
-{
- unsigned int flags;
- void *vaddr;
-
- /* trying to avoid low memory issues */
- flags = current->flags & PF_MEMALLOC;
- current->flags |= PF_MEMALLOC;
- vaddr = (void *)get_zeroed_page(GFP_ATOMIC);
- current->flags &= (~PF_MEMALLOC | flags);
+ page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
+ if (page)
+ vaddr = page_address(page);
return vaddr;
}
@@ -420,7 +405,7 @@ static inline void free_pgtable_page(void *vaddr)
static inline void *alloc_domain_mem(void)
{
- return iommu_kmem_cache_alloc(iommu_domain_cache);
+ return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
}
static void free_domain_mem(void *vaddr)
@@ -430,7 +415,7 @@ static void free_domain_mem(void *vaddr)
static inline void * alloc_devinfo_mem(void)
{
- return iommu_kmem_cache_alloc(iommu_devinfo_cache);
+ return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
}
static inline void free_devinfo_mem(void *vaddr)
@@ -440,7 +425,7 @@ static inline void free_devinfo_mem(void *vaddr)
struct iova *alloc_iova_mem(void)
{
- return iommu_kmem_cache_alloc(iommu_iova_cache);
+ return kmem_cache_alloc(iommu_iova_cache, GFP_ATOMIC);
}
void free_iova_mem(struct iova *iova)
@@ -589,7 +574,8 @@ static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
root = &iommu->root_entry[bus];
context = get_context_addr_from_root(root);
if (!context) {
- context = (struct context_entry *)alloc_pgtable_page();
+ context = (struct context_entry *)
+ alloc_pgtable_page(iommu->node);
if (!context) {
spin_unlock_irqrestore(&iommu->lock, flags);
return NULL;
@@ -732,7 +718,7 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
if (!dma_pte_present(pte)) {
uint64_t pteval;
- tmp_page = alloc_pgtable_page();
+ tmp_page = alloc_pgtable_page(domain->nid);
if (!tmp_page)
return NULL;
@@ -868,7 +854,7 @@ static int iommu_alloc_root_entry(struct intel_iommu *iommu)
struct root_entry *root;
unsigned long flags;
- root = (struct root_entry *)alloc_pgtable_page();
+ root = (struct root_entry *)alloc_pgtable_page(iommu->node);
if (!root)
return -ENOMEM;
@@ -1263,6 +1249,7 @@ static struct dmar_domain *alloc_domain(void)
if (!domain)
return NULL;
+ domain->nid = -1;
memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
domain->flags = 0;
@@ -1420,9 +1407,10 @@ static int domain_init(struct dmar_domain *domain, int guest_width)
domain->iommu_snooping = 0;
domain->iommu_count = 1;
+ domain->nid = iommu->node;
/* always allocate the top pgd */
- domain->pgd = (struct dma_pte *)alloc_pgtable_page();
+ domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
if (!domain->pgd)
return -ENOMEM;
__iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
@@ -1523,12 +1511,15 @@ static int domain_context_mapping_one(struct dmar_domain *domain, int segment,
/* Skip top levels of page tables for
* iommu which has less agaw than default.
+ * Unnecessary for PT mode.
*/
- for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
- pgd = phys_to_virt(dma_pte_addr(pgd));
- if (!dma_pte_present(pgd)) {
- spin_unlock_irqrestore(&iommu->lock, flags);
- return -ENOMEM;
+ if (translation != CONTEXT_TT_PASS_THROUGH) {
+ for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
+ pgd = phys_to_virt(dma_pte_addr(pgd));
+ if (!dma_pte_present(pgd)) {
+ spin_unlock_irqrestore(&iommu->lock, flags);
+ return -ENOMEM;
+ }
}
}
}
@@ -1577,6 +1568,8 @@ static int domain_context_mapping_one(struct dmar_domain *domain, int segment,
spin_lock_irqsave(&domain->iommu_lock, flags);
if (!test_and_set_bit(iommu->seq_id, &domain->iommu_bmp)) {
domain->iommu_count++;
+ if (domain->iommu_count == 1)
+ domain->nid = iommu->node;
domain_update_iommu_cap(domain);
}
spin_unlock_irqrestore(&domain->iommu_lock, flags);
@@ -1991,6 +1984,16 @@ static int iommu_prepare_identity_map(struct pci_dev *pdev,
"IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
pci_name(pdev), start, end);
+ if (end < start) {
+ WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
+ "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
+ dmi_get_system_info(DMI_BIOS_VENDOR),
+ dmi_get_system_info(DMI_BIOS_VERSION),
+ dmi_get_system_info(DMI_PRODUCT_VERSION));
+ ret = -EIO;
+ goto error;
+ }
+
if (end >> agaw_to_width(domain->agaw)) {
WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
"BIOS vendor: %s; Ver: %s; Product Version: %s\n",
@@ -3228,6 +3231,9 @@ static int device_notifier(struct notifier_block *nb,
struct pci_dev *pdev = to_pci_dev(dev);
struct dmar_domain *domain;
+ if (iommu_no_mapping(dev))
+ return 0;
+
domain = find_domain(pdev);
if (!domain)
return 0;
@@ -3455,6 +3461,7 @@ static struct dmar_domain *iommu_alloc_vm_domain(void)
return NULL;
domain->id = vm_domid++;
+ domain->nid = -1;
memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
domain->flags = DOMAIN_FLAG_VIRTUAL_MACHINE;
@@ -3481,9 +3488,10 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width)
domain->iommu_coherency = 0;
domain->iommu_snooping = 0;
domain->max_addr = 0;
+ domain->nid = -1;
/* always allocate the top pgd */
- domain->pgd = (struct dma_pte *)alloc_pgtable_page();
+ domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
if (!domain->pgd)
return -ENOMEM;
domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c
index 1487bf2be863..8b65a489581b 100644
--- a/drivers/pci/intr_remapping.c
+++ b/drivers/pci/intr_remapping.c
@@ -590,7 +590,8 @@ static int setup_intr_remapping(struct intel_iommu *iommu, int mode)
if (!iommu->ir_table)
return -ENOMEM;
- pages = alloc_pages(GFP_ATOMIC | __GFP_ZERO, INTR_REMAP_PAGE_ORDER);
+ pages = alloc_pages_node(iommu->node, GFP_ATOMIC | __GFP_ZERO,
+ INTR_REMAP_PAGE_ORDER);
if (!pages) {
printk(KERN_ERR "failed to allocate pages of order %d\n",
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 4f0a72a9740c..9310c699a37d 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -332,6 +332,7 @@ struct intel_iommu {
#ifdef CONFIG_INTR_REMAP
struct ir_table *ir_table; /* Interrupt remapping info */
#endif
+ int node;
};
static inline void __iommu_flush_cache(