summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-06-29 20:51:03 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2023-06-29 20:51:03 -0700
commitd35ac6ac0e80e55bcea79af18d935f19a3e8554c (patch)
tree84281d7724f5e67c2c0a599dd9ffdeccb96bce9f
parent0b26eadbf200abf6c97c6d870286c73219cdac65 (diff)
parenta7a334076dd725b8c3b5d64f68e3992ffcfd1d25 (diff)
downloadlwn-d35ac6ac0e80e55bcea79af18d935f19a3e8554c.tar.gz
lwn-d35ac6ac0e80e55bcea79af18d935f19a3e8554c.zip
Merge tag 'iommu-updates-v6.5' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu
Pull iommu updates from Joerg Roedel: "Core changes: - iova_magazine_alloc() optimization - Make flush-queue an IOMMU driver capability - Consolidate the error handling around device attachment AMD IOMMU changes: - AVIC Interrupt Remapping Improvements - Some minor fixes and cleanups Intel VT-d changes from Lu Baolu: - Small and misc cleanups ARM-SMMU changes from Will Deacon: - Device-tree binding updates: - Add missing clocks for SC8280XP and SA8775 Adreno SMMUs - Add two new Qualcomm SMMUs in SDX75 and SM6375 - Workarounds for Arm MMU-700 errata: - 1076982: Avoid use of SEV-based cmdq wakeup - 2812531: Terminate command batches with a CMD_SYNC - Enforce single-stage translation to avoid nesting-related errata - Set the correct level hint for range TLB invalidation on teardown .. and some other minor fixes and cleanups (including Freescale PAMU and virtio-iommu changes)" * tag 'iommu-updates-v6.5' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu: (50 commits) iommu/vt-d: Remove commented-out code iommu/vt-d: Remove two WARN_ON in domain_context_mapping_one() iommu/vt-d: Handle the failure case of dmar_reenable_qi() iommu/vt-d: Remove unnecessary (void*) conversions iommu/amd: Remove extern from function prototypes iommu/amd: Use BIT/BIT_ULL macro to define bit fields iommu/amd: Fix DTE_IRQ_PHYS_ADDR_MASK macro iommu/amd: Fix compile error for unused function iommu/amd: Improving Interrupt Remapping Table Invalidation iommu/amd: Do not Invalidate IRT when IRTE caching is disabled iommu/amd: Introduce Disable IRTE Caching Support iommu/amd: Remove the unused struct amd_ir_data.ref iommu/amd: Switch amd_iommu_update_ga() to use modify_irte_ga() iommu/arm-smmu-v3: Set TTL invalidation hint better iommu/arm-smmu-v3: Document nesting-related errata iommu/arm-smmu-v3: Add explicit feature for nesting iommu/arm-smmu-v3: Document MMU-700 erratum 2812531 iommu/arm-smmu-v3: Work around MMU-600 erratum 1076982 dt-bindings: arm-smmu: Add SDX75 SMMU compatible dt-bindings: arm-smmu: Add SM6375 GPU SMMU ...
-rw-r--r--.clang-format1
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt1
-rw-r--r--Documentation/arch/arm64/silicon-errata.rst4
-rw-r--r--Documentation/devicetree/bindings/iommu/arm,smmu.yaml12
-rw-r--r--arch/powerpc/sysdev/fsl_pci.c1
-rw-r--r--drivers/iommu/amd/amd_iommu.h91
-rw-r--r--drivers/iommu/amd/amd_iommu_types.h87
-rw-r--r--drivers/iommu/amd/init.c38
-rw-r--r--drivers/iommu/amd/io_pgtable.c4
-rw-r--r--drivers/iommu/amd/io_pgtable_v2.c2
-rw-r--r--drivers/iommu/amd/iommu.c99
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c61
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h8
-rw-r--r--drivers/iommu/arm/arm-smmu/arm-smmu.c4
-rw-r--r--drivers/iommu/dma-iommu.c3
-rw-r--r--drivers/iommu/fsl_pamu_domain.c113
-rw-r--r--drivers/iommu/intel/iommu.c29
-rw-r--r--drivers/iommu/iommu.c694
-rw-r--r--drivers/iommu/iova.c8
-rw-r--r--drivers/iommu/virtio-iommu.c57
-rw-r--r--include/linux/iommu.h6
21 files changed, 715 insertions, 608 deletions
diff --git a/.clang-format b/.clang-format
index 0d1ed8776733..0bbb1991defe 100644
--- a/.clang-format
+++ b/.clang-format
@@ -254,6 +254,7 @@ ForEachMacros:
- 'for_each_free_mem_range'
- 'for_each_free_mem_range_reverse'
- 'for_each_func_rsrc'
+ - 'for_each_group_device'
- 'for_each_group_evsel'
- 'for_each_group_member'
- 'for_each_hstate'
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index b0dd367dfb59..44bcaf791ce6 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -323,6 +323,7 @@
option with care.
pgtbl_v1 - Use v1 page table for DMA-API (Default).
pgtbl_v2 - Use v2 page table for DMA-API.
+ irtcachedis - Disable Interrupt Remapping Table (IRT) caching.
amd_iommu_dump= [HW,X86-64]
Enable AMD IOMMU driver option to dump the ACPI table
diff --git a/Documentation/arch/arm64/silicon-errata.rst b/Documentation/arch/arm64/silicon-errata.rst
index d6430ade349d..f093a9d8bc5c 100644
--- a/Documentation/arch/arm64/silicon-errata.rst
+++ b/Documentation/arch/arm64/silicon-errata.rst
@@ -140,6 +140,10 @@ stable kernels.
+----------------+-----------------+-----------------+-----------------------------+
| ARM | MMU-500 | #841119,826419 | N/A |
+----------------+-----------------+-----------------+-----------------------------+
+| ARM | MMU-600 | #1076982,1209401| N/A |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM | MMU-700 | #2268618,2812531| N/A |
++----------------+-----------------+-----------------+-----------------------------+
+----------------+-----------------+-----------------+-----------------------------+
| Broadcom | Brahma-B53 | N/A | ARM64_ERRATUM_845719 |
+----------------+-----------------+-----------------+-----------------------------+
diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
index ba677d401e24..3a31a979709b 100644
--- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
+++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
@@ -29,6 +29,7 @@ properties:
- qcom,msm8996-smmu-v2
- qcom,msm8998-smmu-v2
- qcom,sdm630-smmu-v2
+ - qcom,sm6375-smmu-v2
- const: qcom,smmu-v2
- description: Qcom SoCs implementing "qcom,smmu-500" and "arm,mmu-500"
@@ -45,6 +46,7 @@ properties:
- qcom,sdm845-smmu-500
- qcom,sdx55-smmu-500
- qcom,sdx65-smmu-500
+ - qcom,sdx75-smmu-500
- qcom,sm6115-smmu-500
- qcom,sm6125-smmu-500
- qcom,sm6350-smmu-500
@@ -79,7 +81,9 @@ properties:
- description: Qcom Adreno GPUs implementing "qcom,smmu-500" and "arm,mmu-500"
items:
- enum:
+ - qcom,sa8775p-smmu-500
- qcom,sc7280-smmu-500
+ - qcom,sc8280xp-smmu-500
- qcom,sm6115-smmu-500
- qcom,sm6125-smmu-500
- qcom,sm8150-smmu-500
@@ -267,6 +271,7 @@ allOf:
enum:
- qcom,msm8998-smmu-v2
- qcom,sdm630-smmu-v2
+ - qcom,sm6375-smmu-v2
then:
anyOf:
- properties:
@@ -331,7 +336,10 @@ allOf:
properties:
compatible:
contains:
- const: qcom,sc7280-smmu-500
+ enum:
+ - qcom,sa8775p-smmu-500
+ - qcom,sc7280-smmu-500
+ - qcom,sc8280xp-smmu-500
then:
properties:
clock-names:
@@ -413,10 +421,8 @@ allOf:
- nvidia,smmu-500
- qcom,qcm2290-smmu-500
- qcom,qdu1000-smmu-500
- - qcom,sa8775p-smmu-500
- qcom,sc7180-smmu-500
- qcom,sc8180x-smmu-500
- - qcom,sc8280xp-smmu-500
- qcom,sdm670-smmu-500
- qcom,sdm845-smmu-500
- qcom,sdx55-smmu-500
diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c
index b7232c46b244..6daf620b63a4 100644
--- a/arch/powerpc/sysdev/fsl_pci.c
+++ b/arch/powerpc/sysdev/fsl_pci.c
@@ -1353,6 +1353,7 @@ static struct platform_driver fsl_pci_driver = {
.of_match_table = pci_ids,
},
.probe = fsl_pci_probe,
+ .driver_managed_dma = true,
};
static int __init fsl_pci_init(void)
diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
index 9beeceb9d825..0c35018239ce 100644
--- a/drivers/iommu/amd/amd_iommu.h
+++ b/drivers/iommu/amd/amd_iommu.h
@@ -11,12 +11,15 @@
#include "amd_iommu_types.h"
-extern irqreturn_t amd_iommu_int_thread(int irq, void *data);
-extern irqreturn_t amd_iommu_int_handler(int irq, void *data);
-extern void amd_iommu_apply_erratum_63(struct amd_iommu *iommu, u16 devid);
-extern void amd_iommu_restart_event_logging(struct amd_iommu *iommu);
-extern void amd_iommu_restart_ga_log(struct amd_iommu *iommu);
-extern void amd_iommu_set_rlookup_table(struct amd_iommu *iommu, u16 devid);
+irqreturn_t amd_iommu_int_thread(int irq, void *data);
+irqreturn_t amd_iommu_int_handler(int irq, void *data);
+void amd_iommu_apply_erratum_63(struct amd_iommu *iommu, u16 devid);
+void amd_iommu_restart_event_logging(struct amd_iommu *iommu);
+void amd_iommu_restart_ga_log(struct amd_iommu *iommu);
+int amd_iommu_init_devices(void);
+void amd_iommu_uninit_devices(void);
+void amd_iommu_init_notifier(void);
+void amd_iommu_set_rlookup_table(struct amd_iommu *iommu, u16 devid);
#ifdef CONFIG_AMD_IOMMU_DEBUGFS
void amd_iommu_debugfs_setup(struct amd_iommu *iommu);
@@ -25,11 +28,11 @@ static inline void amd_iommu_debugfs_setup(struct amd_iommu *iommu) {}
#endif
/* Needed for interrupt remapping */
-extern int amd_iommu_prepare(void);
-extern int amd_iommu_enable(void);
-extern void amd_iommu_disable(void);
-extern int amd_iommu_reenable(int);
-extern int amd_iommu_enable_faulting(void);
+int amd_iommu_prepare(void);
+int amd_iommu_enable(void);
+void amd_iommu_disable(void);
+int amd_iommu_reenable(int mode);
+int amd_iommu_enable_faulting(void);
extern int amd_iommu_guest_ir;
extern enum io_pgtable_fmt amd_iommu_pgtable;
extern int amd_iommu_gpt_level;
@@ -37,33 +40,32 @@ extern int amd_iommu_gpt_level;
/* IOMMUv2 specific functions */
struct iommu_domain;
-extern bool amd_iommu_v2_supported(void);
-extern struct amd_iommu *get_amd_iommu(unsigned int idx);
-extern u8 amd_iommu_pc_get_max_banks(unsigned int idx);
-extern bool amd_iommu_pc_supported(void);
-extern u8 amd_iommu_pc_get_max_counters(unsigned int idx);
-extern int amd_iommu_pc_get_reg(struct amd_iommu *iommu, u8 bank, u8 cntr,
- u8 fxn, u64 *value);
-extern int amd_iommu_pc_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr,
- u8 fxn, u64 *value);
-
-extern int amd_iommu_register_ppr_notifier(struct notifier_block *nb);
-extern int amd_iommu_unregister_ppr_notifier(struct notifier_block *nb);
-extern void amd_iommu_domain_direct_map(struct iommu_domain *dom);
-extern int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids);
-extern int amd_iommu_flush_page(struct iommu_domain *dom, u32 pasid,
- u64 address);
-extern void amd_iommu_update_and_flush_device_table(struct protection_domain *domain);
-extern void amd_iommu_domain_update(struct protection_domain *domain);
-extern void amd_iommu_domain_flush_complete(struct protection_domain *domain);
-extern void amd_iommu_domain_flush_tlb_pde(struct protection_domain *domain);
-extern int amd_iommu_flush_tlb(struct iommu_domain *dom, u32 pasid);
-extern int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, u32 pasid,
- unsigned long cr3);
-extern int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, u32 pasid);
+bool amd_iommu_v2_supported(void);
+struct amd_iommu *get_amd_iommu(unsigned int idx);
+u8 amd_iommu_pc_get_max_banks(unsigned int idx);
+bool amd_iommu_pc_supported(void);
+u8 amd_iommu_pc_get_max_counters(unsigned int idx);
+int amd_iommu_pc_get_reg(struct amd_iommu *iommu, u8 bank, u8 cntr,
+ u8 fxn, u64 *value);
+int amd_iommu_pc_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr,
+ u8 fxn, u64 *value);
+
+int amd_iommu_register_ppr_notifier(struct notifier_block *nb);
+int amd_iommu_unregister_ppr_notifier(struct notifier_block *nb);
+void amd_iommu_domain_direct_map(struct iommu_domain *dom);
+int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids);
+int amd_iommu_flush_page(struct iommu_domain *dom, u32 pasid, u64 address);
+void amd_iommu_update_and_flush_device_table(struct protection_domain *domain);
+void amd_iommu_domain_update(struct protection_domain *domain);
+void amd_iommu_domain_flush_complete(struct protection_domain *domain);
+void amd_iommu_domain_flush_tlb_pde(struct protection_domain *domain);
+int amd_iommu_flush_tlb(struct iommu_domain *dom, u32 pasid);
+int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, u32 pasid,
+ unsigned long cr3);
+int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, u32 pasid);
#ifdef CONFIG_IRQ_REMAP
-extern int amd_iommu_create_irq_domain(struct amd_iommu *iommu);
+int amd_iommu_create_irq_domain(struct amd_iommu *iommu);
#else
static inline int amd_iommu_create_irq_domain(struct amd_iommu *iommu)
{
@@ -75,8 +77,8 @@ static inline int amd_iommu_create_irq_domain(struct amd_iommu *iommu)
#define PPR_INVALID 0x1
#define PPR_FAILURE 0xf
-extern int amd_iommu_complete_ppr(struct pci_dev *pdev, u32 pasid,
- int status, int tag);
+int amd_iommu_complete_ppr(struct pci_dev *pdev, u32 pasid,
+ int status, int tag);
static inline bool is_rd890_iommu(struct pci_dev *pdev)
{
@@ -129,10 +131,9 @@ static inline void *alloc_pgtable_page(int nid, gfp_t gfp)
return page ? page_address(page) : NULL;
}
-extern bool translation_pre_enabled(struct amd_iommu *iommu);
-extern bool amd_iommu_is_attach_deferred(struct device *dev);
-extern int __init add_special_device(u8 type, u8 id, u32 *devid,
- bool cmd_line);
+bool translation_pre_enabled(struct amd_iommu *iommu);
+bool amd_iommu_is_attach_deferred(struct device *dev);
+int __init add_special_device(u8 type, u8 id, u32 *devid, bool cmd_line);
#ifdef CONFIG_DMI
void amd_iommu_apply_ivrs_quirks(void);
@@ -140,9 +141,9 @@ void amd_iommu_apply_ivrs_quirks(void);
static inline void amd_iommu_apply_ivrs_quirks(void) { }
#endif
-extern void amd_iommu_domain_set_pgtable(struct protection_domain *domain,
- u64 *root, int mode);
-extern struct dev_table_entry *get_dev_table(struct amd_iommu *iommu);
+void amd_iommu_domain_set_pgtable(struct protection_domain *domain,
+ u64 *root, int mode);
+struct dev_table_entry *get_dev_table(struct amd_iommu *iommu);
extern u64 amd_iommu_efr;
extern u64 amd_iommu_efr2;
diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h
index ab8aa8f77cc4..dc1db6167927 100644
--- a/drivers/iommu/amd/amd_iommu_types.h
+++ b/drivers/iommu/amd/amd_iommu_types.h
@@ -84,21 +84,21 @@
/* Extended Feature Bits */
-#define FEATURE_PREFETCH (1ULL<<0)
-#define FEATURE_PPR (1ULL<<1)
-#define FEATURE_X2APIC (1ULL<<2)
-#define FEATURE_NX (1ULL<<3)
-#define FEATURE_GT (1ULL<<4)
-#define FEATURE_IA (1ULL<<6)
-#define FEATURE_GA (1ULL<<7)
-#define FEATURE_HE (1ULL<<8)
-#define FEATURE_PC (1ULL<<9)
+#define FEATURE_PREFETCH BIT_ULL(0)
+#define FEATURE_PPR BIT_ULL(1)
+#define FEATURE_X2APIC BIT_ULL(2)
+#define FEATURE_NX BIT_ULL(3)
+#define FEATURE_GT BIT_ULL(4)
+#define FEATURE_IA BIT_ULL(6)
+#define FEATURE_GA BIT_ULL(7)
+#define FEATURE_HE BIT_ULL(8)
+#define FEATURE_PC BIT_ULL(9)
#define FEATURE_GATS_SHIFT (12)
#define FEATURE_GATS_MASK (3ULL)
-#define FEATURE_GAM_VAPIC (1ULL<<21)
-#define FEATURE_GIOSUP (1ULL<<48)
-#define FEATURE_EPHSUP (1ULL<<50)
-#define FEATURE_SNP (1ULL<<63)
+#define FEATURE_GAM_VAPIC BIT_ULL(21)
+#define FEATURE_GIOSUP BIT_ULL(48)
+#define FEATURE_EPHSUP BIT_ULL(50)
+#define FEATURE_SNP BIT_ULL(63)
#define FEATURE_PASID_SHIFT 32
#define FEATURE_PASID_MASK (0x1fULL << FEATURE_PASID_SHIFT)
@@ -120,13 +120,13 @@
#define PASID_MASK 0x0000ffff
/* MMIO status bits */
-#define MMIO_STATUS_EVT_OVERFLOW_INT_MASK (1 << 0)
-#define MMIO_STATUS_EVT_INT_MASK (1 << 1)
-#define MMIO_STATUS_COM_WAIT_INT_MASK (1 << 2)
-#define MMIO_STATUS_PPR_INT_MASK (1 << 6)
-#define MMIO_STATUS_GALOG_RUN_MASK (1 << 8)
-#define MMIO_STATUS_GALOG_OVERFLOW_MASK (1 << 9)
-#define MMIO_STATUS_GALOG_INT_MASK (1 << 10)
+#define MMIO_STATUS_EVT_OVERFLOW_INT_MASK BIT(0)
+#define MMIO_STATUS_EVT_INT_MASK BIT(1)
+#define MMIO_STATUS_COM_WAIT_INT_MASK BIT(2)
+#define MMIO_STATUS_PPR_INT_MASK BIT(6)
+#define MMIO_STATUS_GALOG_RUN_MASK BIT(8)
+#define MMIO_STATUS_GALOG_OVERFLOW_MASK BIT(9)
+#define MMIO_STATUS_GALOG_INT_MASK BIT(10)
/* event logging constants */
#define EVENT_ENTRY_SIZE 0x10
@@ -174,6 +174,7 @@
#define CONTROL_GAINT_EN 29
#define CONTROL_XT_EN 50
#define CONTROL_INTCAPXT_EN 51
+#define CONTROL_IRTCACHEDIS 59
#define CONTROL_SNPAVIC_EN 61
#define CTRL_INV_TO_MASK (7 << CONTROL_INV_TIMEOUT)
@@ -283,7 +284,7 @@
#define AMD_IOMMU_PGSIZES_V2 (PAGE_SIZE | (1ULL << 21) | (1ULL << 30))
/* Bit value definition for dte irq remapping fields*/
-#define DTE_IRQ_PHYS_ADDR_MASK (((1ULL << 45)-1) << 6)
+#define DTE_IRQ_PHYS_ADDR_MASK GENMASK_ULL(51, 6)
#define DTE_IRQ_REMAP_INTCTL_MASK (0x3ULL << 60)
#define DTE_IRQ_REMAP_INTCTL (2ULL << 60)
#define DTE_IRQ_REMAP_ENABLE 1ULL
@@ -369,23 +370,23 @@
/*
* Bit value definition for I/O PTE fields
*/
-#define IOMMU_PTE_PR (1ULL << 0)
-#define IOMMU_PTE_U (1ULL << 59)
-#define IOMMU_PTE_FC (1ULL << 60)
-#define IOMMU_PTE_IR (1ULL << 61)
-#define IOMMU_PTE_IW (1ULL << 62)
+#define IOMMU_PTE_PR BIT_ULL(0)
+#define IOMMU_PTE_U BIT_ULL(59)
+#define IOMMU_PTE_FC BIT_ULL(60)
+#define IOMMU_PTE_IR BIT_ULL(61)
+#define IOMMU_PTE_IW BIT_ULL(62)
/*
* Bit value definition for DTE fields
*/
-#define DTE_FLAG_V (1ULL << 0)
-#define DTE_FLAG_TV (1ULL << 1)
-#define DTE_FLAG_IR (1ULL << 61)
-#define DTE_FLAG_IW (1ULL << 62)
-
-#define DTE_FLAG_IOTLB (1ULL << 32)
-#define DTE_FLAG_GIOV (1ULL << 54)
-#define DTE_FLAG_GV (1ULL << 55)
+#define DTE_FLAG_V BIT_ULL(0)
+#define DTE_FLAG_TV BIT_ULL(1)
+#define DTE_FLAG_IR BIT_ULL(61)
+#define DTE_FLAG_IW BIT_ULL(62)
+
+#define DTE_FLAG_IOTLB BIT_ULL(32)
+#define DTE_FLAG_GIOV BIT_ULL(54)
+#define DTE_FLAG_GV BIT_ULL(55)
#define DTE_FLAG_MASK (0x3ffULL << 32)
#define DTE_GLX_SHIFT (56)
#define DTE_GLX_MASK (3)
@@ -439,13 +440,13 @@
#define MAX_DOMAIN_ID 65536
/* Protection domain flags */
-#define PD_DMA_OPS_MASK (1UL << 0) /* domain used for dma_ops */
-#define PD_DEFAULT_MASK (1UL << 1) /* domain is a default dma_ops
+#define PD_DMA_OPS_MASK BIT(0) /* domain used for dma_ops */
+#define PD_DEFAULT_MASK BIT(1) /* domain is a default dma_ops
domain for an IOMMU */
-#define PD_PASSTHROUGH_MASK (1UL << 2) /* domain has no page
+#define PD_PASSTHROUGH_MASK BIT(2) /* domain has no page
translation */
-#define PD_IOMMUV2_MASK (1UL << 3) /* domain has gcr3 table */
-#define PD_GIOV_MASK (1UL << 4) /* domain enable GIOV support */
+#define PD_IOMMUV2_MASK BIT(3) /* domain has gcr3 table */
+#define PD_GIOV_MASK BIT(4) /* domain enable GIOV support */
extern bool amd_iommu_dump;
#define DUMP_printk(format, arg...) \
@@ -716,6 +717,9 @@ struct amd_iommu {
/* if one, we need to send a completion wait command */
bool need_sync;
+ /* true if disable irte caching */
+ bool irtcachedis_enabled;
+
/* Handle for IOMMU core code */
struct iommu_device iommu;
@@ -748,7 +752,7 @@ struct amd_iommu {
u32 flags;
volatile u64 *cmd_sem;
- u64 cmd_sem_val;
+ atomic64_t cmd_sem_val;
#ifdef CONFIG_AMD_IOMMU_DEBUGFS
/* DebugFS Info */
@@ -882,7 +886,7 @@ extern int amd_iommu_max_glx_val;
* This function flushes all internal caches of
* the IOMMU used by this driver.
*/
-extern void iommu_flush_all_caches(struct amd_iommu *iommu);
+void iommu_flush_all_caches(struct amd_iommu *iommu);
static inline int get_ioapic_devid(int id)
{
@@ -1006,7 +1010,6 @@ struct amd_ir_data {
struct irq_2_irte irq_2_irte;
struct msi_msg msi_entry;
void *entry; /* Pointer to union irte or struct irte_ga */
- void *ref; /* Pointer to the actual irte */
/**
* Store information for activate/de-activate
diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
index c2d80a4e5fb0..ea0f1ab94178 100644
--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -162,6 +162,7 @@ static int amd_iommu_xt_mode = IRQ_REMAP_XAPIC_MODE;
static bool amd_iommu_detected;
static bool amd_iommu_disabled __initdata;
static bool amd_iommu_force_enable __initdata;
+static bool amd_iommu_irtcachedis;
static int amd_iommu_target_ivhd_type;
/* Global EFR and EFR2 registers */
@@ -484,6 +485,9 @@ static void iommu_disable(struct amd_iommu *iommu)
/* Disable IOMMU hardware itself */
iommu_feature_disable(iommu, CONTROL_IOMMU_EN);
+
+ /* Clear IRTE cache disabling bit */
+ iommu_feature_disable(iommu, CONTROL_IRTCACHEDIS);
}
/*
@@ -1753,7 +1757,7 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h,
iommu->pci_seg = pci_seg;
raw_spin_lock_init(&iommu->lock);
- iommu->cmd_sem_val = 0;
+ atomic64_set(&iommu->cmd_sem_val, 0);
/* Add IOMMU to internal data structures */
list_add_tail(&iommu->list, &amd_iommu_list);
@@ -2710,6 +2714,33 @@ static void iommu_enable_ga(struct amd_iommu *iommu)
#endif
}
+static void iommu_disable_irtcachedis(struct amd_iommu *iommu)
+{
+ iommu_feature_disable(iommu, CONTROL_IRTCACHEDIS);
+}
+
+static void iommu_enable_irtcachedis(struct amd_iommu *iommu)
+{
+ u64 ctrl;
+
+ if (!amd_iommu_irtcachedis)
+ return;
+
+ /*
+ * Note:
+ * The support for IRTCacheDis feature is dertermined by
+ * checking if the bit is writable.
+ */
+ iommu_feature_enable(iommu, CONTROL_IRTCACHEDIS);
+ ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET);
+ ctrl &= (1ULL << CONTROL_IRTCACHEDIS);
+ if (ctrl)
+ iommu->irtcachedis_enabled = true;
+ pr_info("iommu%d (%#06x) : IRT cache is %s\n",
+ iommu->index, iommu->devid,
+ iommu->irtcachedis_enabled ? "disabled" : "enabled");
+}
+
static void early_enable_iommu(struct amd_iommu *iommu)
{
iommu_disable(iommu);
@@ -2720,6 +2751,7 @@ static void early_enable_iommu(struct amd_iommu *iommu)
iommu_set_exclusion_range(iommu);
iommu_enable_ga(iommu);
iommu_enable_xt(iommu);
+ iommu_enable_irtcachedis(iommu);
iommu_enable(iommu);
iommu_flush_all_caches(iommu);
}
@@ -2770,10 +2802,12 @@ static void early_enable_iommus(void)
for_each_iommu(iommu) {
iommu_disable_command_buffer(iommu);
iommu_disable_event_buffer(iommu);
+ iommu_disable_irtcachedis(iommu);
iommu_enable_command_buffer(iommu);
iommu_enable_event_buffer(iommu);
iommu_enable_ga(iommu);
iommu_enable_xt(iommu);
+ iommu_enable_irtcachedis(iommu);
iommu_set_device_table(iommu);
iommu_flush_all_caches(iommu);
}
@@ -3426,6 +3460,8 @@ static int __init parse_amd_iommu_options(char *str)
amd_iommu_pgtable = AMD_IOMMU_V1;
} else if (strncmp(str, "pgtbl_v2", 8) == 0) {
amd_iommu_pgtable = AMD_IOMMU_V2;
+ } else if (strncmp(str, "irtcachedis", 11) == 0) {
+ amd_iommu_irtcachedis = true;
} else {
pr_notice("Unknown option - '%s'\n", str);
}
diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c
index 1b67116882be..2892aa1b4dc1 100644
--- a/drivers/iommu/amd/io_pgtable.c
+++ b/drivers/iommu/amd/io_pgtable.c
@@ -310,8 +310,8 @@ static u64 *fetch_pte(struct amd_io_pgtable *pgtable,
return NULL;
/* Large PTE */
- if (PM_PTE_LEVEL(*pte) == 7 ||
- PM_PTE_LEVEL(*pte) == 0)
+ if (PM_PTE_LEVEL(*pte) == PAGE_MODE_7_LEVEL ||
+ PM_PTE_LEVEL(*pte) == PAGE_MODE_NONE)
break;
/* No level skipping support yet */
diff --git a/drivers/iommu/amd/io_pgtable_v2.c b/drivers/iommu/amd/io_pgtable_v2.c
index 27c3015947e6..e9ef2e0a62f6 100644
--- a/drivers/iommu/amd/io_pgtable_v2.c
+++ b/drivers/iommu/amd/io_pgtable_v2.c
@@ -2,7 +2,7 @@
/*
* CPU-agnostic AMD IO page table v2 allocator.
*
- * Copyright (C) 2022 Advanced Micro Devices, Inc.
+ * Copyright (C) 2022, 2023 Advanced Micro Devices, Inc.
* Author: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
* Author: Vasant Hegde <vasant.hegde@amd.com>
*/
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 9ea40960978b..c3b58a8389b9 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -1182,11 +1182,11 @@ static int iommu_completion_wait(struct amd_iommu *iommu)
if (!iommu->need_sync)
return 0;
- raw_spin_lock_irqsave(&iommu->lock, flags);
-
- data = ++iommu->cmd_sem_val;
+ data = atomic64_add_return(1, &iommu->cmd_sem_val);
build_completion_wait(&cmd, iommu, data);
+ raw_spin_lock_irqsave(&iommu->lock, flags);
+
ret = __iommu_queue_command_sync(iommu, &cmd, false);
if (ret)
goto out_unlock;
@@ -1273,6 +1273,9 @@ static void amd_iommu_flush_irt_all(struct amd_iommu *iommu)
u32 devid;
u16 last_bdf = iommu->pci_seg->last_bdf;
+ if (iommu->irtcachedis_enabled)
+ return;
+
for (devid = 0; devid <= last_bdf; devid++)
iommu_flush_irt(iommu, devid);
@@ -2313,6 +2316,8 @@ static bool amd_iommu_capable(struct device *dev, enum iommu_cap cap)
return amdr_ivrs_remap_support;
case IOMMU_CAP_ENFORCE_CACHE_COHERENCY:
return true;
+ case IOMMU_CAP_DEFERRED_FLUSH:
+ return true;
default:
break;
}
@@ -2822,6 +2827,32 @@ EXPORT_SYMBOL(amd_iommu_device_info);
static struct irq_chip amd_ir_chip;
static DEFINE_SPINLOCK(iommu_table_lock);
+static void iommu_flush_irt_and_complete(struct amd_iommu *iommu, u16 devid)
+{
+ int ret;
+ u64 data;
+ unsigned long flags;
+ struct iommu_cmd cmd, cmd2;
+
+ if (iommu->irtcachedis_enabled)
+ return;
+
+ build_inv_irt(&cmd, devid);
+ data = atomic64_add_return(1, &iommu->cmd_sem_val);
+ build_completion_wait(&cmd2, iommu, data);
+
+ raw_spin_lock_irqsave(&iommu->lock, flags);
+ ret = __iommu_queue_command_sync(iommu, &cmd, true);
+ if (ret)
+ goto out;
+ ret = __iommu_queue_command_sync(iommu, &cmd2, false);
+ if (ret)
+ goto out;
+ wait_on_sem(iommu, data);
+out:
+ raw_spin_unlock_irqrestore(&iommu->lock, flags);
+}
+
static void set_dte_irq_entry(struct amd_iommu *iommu, u16 devid,
struct irq_remap_table *table)
{
@@ -3021,7 +3052,7 @@ out:
}
static int modify_irte_ga(struct amd_iommu *iommu, u16 devid, int index,
- struct irte_ga *irte, struct amd_ir_data *data)
+ struct irte_ga *irte)
{
struct irq_remap_table *table;
struct irte_ga *entry;
@@ -3046,13 +3077,9 @@ static int modify_irte_ga(struct amd_iommu *iommu, u16 devid, int index,
old = entry->irte;
WARN_ON(!try_cmpxchg128(&entry->irte, &old, irte->irte));
- if (data)
- data->ref = entry;
-
raw_spin_unlock_irqrestore(&table->lock, flags);
- iommu_flush_irt(iommu, devid);
- iommu_completion_wait(iommu);
+ iommu_flush_irt_and_complete(iommu, devid);
return 0;
}
@@ -3071,8 +3098,7 @@ static int modify_irte(struct amd_iommu *iommu,
table->table[index] = irte->val;
raw_spin_unlock_irqrestore(&table->lock, flags);
- iommu_flush_irt(iommu, devid);
- iommu_completion_wait(iommu);
+ iommu_flush_irt_and_complete(iommu, devid);
return 0;
}
@@ -3090,8 +3116,7 @@ static void free_irte(struct amd_iommu *iommu, u16 devid, int index)
iommu->irte_ops->clear_allocated(table, index);
raw_spin_unlock_irqrestore(&table->lock, flags);
- iommu_flush_irt(iommu, devid);
- iommu_completion_wait(iommu);
+ iommu_flush_irt_and_complete(iommu, devid);
}
static void irte_prepare(void *entry,
@@ -3137,7 +3162,7 @@ static void irte_ga_activate(struct amd_iommu *iommu, void *entry, u16 devid, u1
struct irte_ga *irte = (struct irte_ga *) entry;
irte->lo.fields_remap.valid = 1;
- modify_irte_ga(iommu, devid, index, irte, NULL);
+ modify_irte_ga(iommu, devid, index, irte);
}
static void irte_deactivate(struct amd_iommu *iommu, void *entry, u16 devid, u16 index)
@@ -3153,7 +3178,7 @@ static void irte_ga_deactivate(struct amd_iommu *iommu, void *entry, u16 devid,
struct irte_ga *irte = (struct irte_ga *) entry;
irte->lo.fields_remap.valid = 0;
- modify_irte_ga(iommu, devid, index, irte, NULL);
+ modify_irte_ga(iommu, devid, index, irte);
}
static void irte_set_affinity(struct amd_iommu *iommu, void *entry, u16 devid, u16 index,
@@ -3177,7 +3202,7 @@ static void irte_ga_set_affinity(struct amd_iommu *iommu, void *entry, u16 devid
APICID_TO_IRTE_DEST_LO(dest_apicid);
irte->hi.fields.destination =
APICID_TO_IRTE_DEST_HI(dest_apicid);
- modify_irte_ga(iommu, devid, index, irte, NULL);
+ modify_irte_ga(iommu, devid, index, irte);
}
}
@@ -3527,7 +3552,7 @@ int amd_iommu_activate_guest_mode(void *data)
entry->lo.fields_vapic.ga_tag = ir_data->ga_tag;
return modify_irte_ga(ir_data->iommu, ir_data->irq_2_irte.devid,
- ir_data->irq_2_irte.index, entry, ir_data);
+ ir_data->irq_2_irte.index, entry);
}
EXPORT_SYMBOL(amd_iommu_activate_guest_mode);
@@ -3557,7 +3582,7 @@ int amd_iommu_deactivate_guest_mode(void *data)
APICID_TO_IRTE_DEST_HI(cfg->dest_apicid);
return modify_irte_ga(ir_data->iommu, ir_data->irq_2_irte.devid,
- ir_data->irq_2_irte.index, entry, ir_data);
+ ir_data->irq_2_irte.index, entry);
}
EXPORT_SYMBOL(amd_iommu_deactivate_guest_mode);
@@ -3719,44 +3744,26 @@ int amd_iommu_create_irq_domain(struct amd_iommu *iommu)
int amd_iommu_update_ga(int cpu, bool is_run, void *data)
{
- unsigned long flags;
- struct amd_iommu *iommu;
- struct irq_remap_table *table;
struct amd_ir_data *ir_data = (struct amd_ir_data *)data;
- int devid = ir_data->irq_2_irte.devid;
struct irte_ga *entry = (struct irte_ga *) ir_data->entry;
- struct irte_ga *ref = (struct irte_ga *) ir_data->ref;
if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) ||
- !ref || !entry || !entry->lo.fields_vapic.guest_mode)
+ !entry || !entry->lo.fields_vapic.guest_mode)
return 0;
- iommu = ir_data->iommu;
- if (!iommu)
+ if (!ir_data->iommu)
return -ENODEV;
- table = get_irq_table(iommu, devid);
- if (!table)
- return -ENODEV;
-
- raw_spin_lock_irqsave(&table->lock, flags);
-
- if (ref->lo.fields_vapic.guest_mode) {
- if (cpu >= 0) {
- ref->lo.fields_vapic.destination =
- APICID_TO_IRTE_DEST_LO(cpu);
- ref->hi.fields.destination =
- APICID_TO_IRTE_DEST_HI(cpu);
- }
- ref->lo.fields_vapic.is_run = is_run;
- barrier();
+ if (cpu >= 0) {
+ entry->lo.fields_vapic.destination =
+ APICID_TO_IRTE_DEST_LO(cpu);
+ entry->hi.fields.destination =
+ APICID_TO_IRTE_DEST_HI(cpu);
}
+ entry->lo.fields_vapic.is_run = is_run;
- raw_spin_unlock_irqrestore(&table->lock, flags);
-
- iommu_flush_irt(iommu, devid);
- iommu_completion_wait(iommu);
- return 0;
+ return modify_irte_ga(ir_data->iommu, ir_data->irq_2_irte.devid,
+ ir_data->irq_2_irte.index, entry);
}
EXPORT_SYMBOL(amd_iommu_update_ga);
#endif
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index 3fd83fb75722..9b0dc3505601 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -894,6 +894,12 @@ static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
{
int index;
+ if (cmds->num == CMDQ_BATCH_ENTRIES - 1 &&
+ (smmu->options & ARM_SMMU_OPT_CMDQ_FORCE_SYNC)) {
+ arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true);
+ cmds->num = 0;
+ }
+
if (cmds->num == CMDQ_BATCH_ENTRIES) {
arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, false);
cmds->num = 0;
@@ -1892,8 +1898,13 @@ static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd,
/* Convert page size of 12,14,16 (log2) to 1,2,3 */
cmd->tlbi.tg = (tg - 10) / 2;
- /* Determine what level the granule is at */
- cmd->tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3));
+ /*
+ * Determine what level the granule is at. For non-leaf, io-pgtable
+ * assumes .tlb_flush_walk can invalidate multiple levels at once,
+ * so ignore the nominal last-level granule and leave TTL=0.
+ */
+ if (cmd->tlbi.leaf)
+ cmd->tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3));
num_pages = size >> tg;
}
@@ -2008,6 +2019,7 @@ static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap)
/* Assume that a coherent TCU implies coherent TBUs */
return master->smmu->features & ARM_SMMU_FEAT_COHERENCY;
case IOMMU_CAP_NOEXEC:
+ case IOMMU_CAP_DEFERRED_FLUSH:
return true;
default:
return false;
@@ -2023,7 +2035,6 @@ static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
if (type != IOMMU_DOMAIN_UNMANAGED &&
type != IOMMU_DOMAIN_DMA &&
- type != IOMMU_DOMAIN_DMA_FQ &&
type != IOMMU_DOMAIN_IDENTITY)
return NULL;
@@ -3429,6 +3440,44 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
return 0;
}
+#define IIDR_IMPLEMENTER_ARM 0x43b
+#define IIDR_PRODUCTID_ARM_MMU_600 0x483
+#define IIDR_PRODUCTID_ARM_MMU_700 0x487
+
+static void arm_smmu_device_iidr_probe(struct arm_smmu_device *smmu)
+{
+ u32 reg;
+ unsigned int implementer, productid, variant, revision;
+
+ reg = readl_relaxed(smmu->base + ARM_SMMU_IIDR);
+ implementer = FIELD_GET(IIDR_IMPLEMENTER, reg);
+ productid = FIELD_GET(IIDR_PRODUCTID, reg);
+ variant = FIELD_GET(IIDR_VARIANT, reg);
+ revision = FIELD_GET(IIDR_REVISION, reg);
+
+ switch (implementer) {
+ case IIDR_IMPLEMENTER_ARM:
+ switch (productid) {
+ case IIDR_PRODUCTID_ARM_MMU_600:
+ /* Arm erratum 1076982 */
+ if (variant == 0 && revision <= 2)
+ smmu->features &= ~ARM_SMMU_FEAT_SEV;
+ /* Arm erratum 1209401 */
+ if (variant < 2)
+ smmu->features &= ~ARM_SMMU_FEAT_NESTING;
+ break;
+ case IIDR_PRODUCTID_ARM_MMU_700:
+ /* Arm erratum 2812531 */
+ smmu->features &= ~ARM_SMMU_FEAT_BTM;
+ smmu->options |= ARM_SMMU_OPT_CMDQ_FORCE_SYNC;
+ /* Arm errata 2268618, 2812531 */
+ smmu->features &= ~ARM_SMMU_FEAT_NESTING;
+ break;
+ }
+ break;
+ }
+}
+
static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
{
u32 reg;
@@ -3635,6 +3684,12 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
smmu->ias = max(smmu->ias, smmu->oas);
+ if ((smmu->features & ARM_SMMU_FEAT_TRANS_S1) &&
+ (smmu->features & ARM_SMMU_FEAT_TRANS_S2))
+ smmu->features |= ARM_SMMU_FEAT_NESTING;
+
+ arm_smmu_device_iidr_probe(smmu);
+
if (arm_smmu_sva_supported(smmu))
smmu->features |= ARM_SMMU_FEAT_SVA;
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index b574c58a3487..dcab85698a4e 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -69,6 +69,12 @@
#define IDR5_VAX GENMASK(11, 10)
#define IDR5_VAX_52_BIT 1
+#define ARM_SMMU_IIDR 0x18
+#define IIDR_PRODUCTID GENMASK(31, 20)
+#define IIDR_VARIANT GENMASK(19, 16)
+#define IIDR_REVISION GENMASK(15, 12)
+#define IIDR_IMPLEMENTER GENMASK(11, 0)
+
#define ARM_SMMU_CR0 0x20
#define CR0_ATSCHK (1 << 4)
#define CR0_CMDQEN (1 << 3)
@@ -639,11 +645,13 @@ struct arm_smmu_device {
#define ARM_SMMU_FEAT_BTM (1 << 16)
#define ARM_SMMU_FEAT_SVA (1 << 17)
#define ARM_SMMU_FEAT_E2H (1 << 18)
+#define ARM_SMMU_FEAT_NESTING (1 << 19)
u32 features;
#define ARM_SMMU_OPT_SKIP_PREFETCH (1 << 0)
#define ARM_SMMU_OPT_PAGE0_REGS_ONLY (1 << 1)
#define ARM_SMMU_OPT_MSIPOLL (1 << 2)
+#define ARM_SMMU_OPT_CMDQ_FORCE_SYNC (1 << 3)
u32 options;
struct arm_smmu_cmdq cmdq;
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c
index 6e0813b26fb6..a86acd76c1df 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c
@@ -856,8 +856,7 @@ static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
struct arm_smmu_domain *smmu_domain;
if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_IDENTITY) {
- if (using_legacy_binding ||
- (type != IOMMU_DOMAIN_DMA && type != IOMMU_DOMAIN_DMA_FQ))
+ if (using_legacy_binding || type != IOMMU_DOMAIN_DMA)
return NULL;
}
/*
@@ -1325,6 +1324,7 @@ static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap)
return cfg->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK ||
device_get_dma_attr(dev) == DEV_DMA_COHERENT;
case IOMMU_CAP_NOEXEC:
+ case IOMMU_CAP_DEFERRED_FLUSH:
return true;
default:
return false;
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index e86ae462cade..e57724163835 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -615,7 +615,8 @@ static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
goto done_unlock;
/* If the FQ fails we can simply fall back to strict mode */
- if (domain->type == IOMMU_DOMAIN_DMA_FQ && iommu_dma_init_fq(domain))
+ if (domain->type == IOMMU_DOMAIN_DMA_FQ &&
+ (!device_iommu_capable(dev, IOMMU_CAP_DEFERRED_FLUSH) || iommu_dma_init_fq(domain)))
domain->type = IOMMU_DOMAIN_DMA;
ret = iova_reserve_iommu_regions(dev, domain);
diff --git a/drivers/iommu/fsl_pamu_domain.c b/drivers/iommu/fsl_pamu_domain.c
index bce372297099..4ac0e247ec2b 100644
--- a/drivers/iommu/fsl_pamu_domain.c
+++ b/drivers/iommu/fsl_pamu_domain.c
@@ -334,17 +334,6 @@ int fsl_pamu_configure_l1_stash(struct iommu_domain *domain, u32 cpu)
return ret;
}
-static struct iommu_group *get_device_iommu_group(struct device *dev)
-{
- struct iommu_group *group;
-
- group = iommu_group_get(dev);
- if (!group)
- group = iommu_group_alloc();
-
- return group;
-}
-
static bool check_pci_ctl_endpt_part(struct pci_controller *pci_ctl)
{
u32 version;
@@ -356,94 +345,52 @@ static bool check_pci_ctl_endpt_part(struct pci_controller *pci_ctl)
return version >= 0x204;
}
-/* Get iommu group information from peer devices or devices on the parent bus */
-static struct iommu_group *get_shared_pci_device_group(struct pci_dev *pdev)
+static struct iommu_group *fsl_pamu_device_group(struct device *dev)
{
- struct pci_dev *tmp;
struct iommu_group *group;
- struct pci_bus *bus = pdev->bus;
+ struct pci_dev *pdev;
/*
- * Traverese the pci bus device list to get
- * the shared iommu group.
+ * For platform devices we allocate a separate group for each of the
+ * devices.
*/
- while (bus) {
- list_for_each_entry(tmp, &bus->devices, bus_list) {
- if (tmp == pdev)
- continue;
- group = iommu_group_get(&tmp->dev);
- if (group)
- return group;
- }
+ if (!dev_is_pci(dev))
+ return generic_device_group(dev);
- bus = bus->parent;
- }
-
- return NULL;
-}
-
-static struct iommu_group *get_pci_device_group(struct pci_dev *pdev)
-{
- struct pci_controller *pci_ctl;
- bool pci_endpt_partitioning;
- struct iommu_group *group = NULL;
-
- pci_ctl = pci_bus_to_host(pdev->bus);
- pci_endpt_partitioning = check_pci_ctl_endpt_part(pci_ctl);
- /* We can partition PCIe devices so assign device group to the device */
- if (pci_endpt_partitioning) {
- group = pci_device_group(&pdev->dev);
-
- /*
- * PCIe controller is not a paritionable entity
- * free the controller device iommu_group.
- */
- if (pci_ctl->parent->iommu_group)
- iommu_group_remove_device(pci_ctl->parent);
- } else {
- /*
- * All devices connected to the controller will share the
- * PCI controllers device group. If this is the first
- * device to be probed for the pci controller, copy the
- * device group information from the PCI controller device
- * node and remove the PCI controller iommu group.
- * For subsequent devices, the iommu group information can
- * be obtained from sibling devices (i.e. from the bus_devices
- * link list).
- */
- if (pci_ctl->parent->iommu_group) {
- group = get_device_iommu_group(pci_ctl->parent);
- iommu_group_remove_device(pci_ctl->parent);
- } else {
- group = get_shared_pci_device_group(pdev);
- }
- }
-
- if (!group)
- group = ERR_PTR(-ENODEV);
+ /*
+ * We can partition PCIe devices so assign device group to the device
+ */
+ pdev = to_pci_dev(dev);
+ if (check_pci_ctl_endpt_part(pci_bus_to_host(pdev->bus)))
+ return pci_device_group(&pdev->dev);
+ /*
+ * All devices connected to the controller will share the same device
+ * group.
+ *
+ * Due to ordering between fsl_pamu_init() and fsl_pci_init() it is
+ * guaranteed that the pci_ctl->parent platform_device will have the
+ * iommu driver bound and will already have a group set. So we just
+ * re-use this group as the group for every device in the hose.
+ */
+ group = iommu_group_get(pci_bus_to_host(pdev->bus)->parent);
+ if (WARN_ON(!group))
+ return ERR_PTR(-EINVAL);
return group;
}
-static struct iommu_group *fsl_pamu_device_group(struct device *dev)
+static struct iommu_device *fsl_pamu_probe_device(struct device *dev)
{
- struct iommu_group *group = ERR_PTR(-ENODEV);
int len;
/*
- * For platform devices we allocate a separate group for
- * each of the devices.
+ * uboot must fill the fsl,liodn for platform devices to be supported by
+ * the iommu.
*/
- if (dev_is_pci(dev))
- group = get_pci_device_group(to_pci_dev(dev));
- else if (of_get_property(dev->of_node, "fsl,liodn", &len))
- group = get_device_iommu_group(dev);
-
- return group;
-}
+ if (!dev_is_pci(dev) &&
+ !of_get_property(dev->of_node, "fsl,liodn", &len))
+ return ERR_PTR(-ENODEV);
-static struct iommu_device *fsl_pamu_probe_device(struct device *dev)
-{
return &pamu_iommu;
}
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index b871a6afd803..5c8c5cdc36cf 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -1185,7 +1185,7 @@ static int iommu_alloc_root_entry(struct intel_iommu *iommu)
{
struct root_entry *root;
- root = (struct root_entry *)alloc_pgtable_page(iommu->node, GFP_ATOMIC);
+ root = alloc_pgtable_page(iommu->node, GFP_ATOMIC);
if (!root) {
pr_err("Allocating root entry for %s failed\n",
iommu->name);
@@ -1312,15 +1312,7 @@ static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
iommu->name, type);
return;
}
- /* Note: set drain read/write */
-#if 0
- /*
- * This is probably to be super secure.. Looks like we can
- * ignore it without any impact.
- */
- if (cap_read_drain(iommu->cap))
- val |= DMA_TLB_READ_DRAIN;
-#endif
+
if (cap_write_drain(iommu->cap))
val |= DMA_TLB_WRITE_DRAIN;
@@ -1897,8 +1889,6 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
struct context_entry *context;
int ret;
- WARN_ON(did == 0);
-
if (hw_pass_through && domain_type_is_si(domain))
translation = CONTEXT_TT_PASS_THROUGH;
@@ -1944,8 +1934,6 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
if (sm_supported(iommu)) {
unsigned long pds;
- WARN_ON(!table);
-
/* Setup the PASID DIR pointer: */
pds = context_get_sm_pds(table);
context->lo = (u64)virt_to_phys(table->table) |
@@ -2967,10 +2955,15 @@ static int init_iommu_hw(void)
{
struct dmar_drhd_unit *drhd;
struct intel_iommu *iommu = NULL;
+ int ret;
- for_each_active_iommu(iommu, drhd)
- if (iommu->qi)
- dmar_reenable_qi(iommu);
+ for_each_active_iommu(iommu, drhd) {
+ if (iommu->qi) {
+ ret = dmar_reenable_qi(iommu);
+ if (ret)
+ return ret;
+ }
+ }
for_each_iommu(iommu, drhd) {
if (drhd->ignored) {
@@ -4064,7 +4057,6 @@ static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
case IOMMU_DOMAIN_BLOCKED:
return &blocking_domain;
case IOMMU_DOMAIN_DMA:
- case IOMMU_DOMAIN_DMA_FQ:
case IOMMU_DOMAIN_UNMANAGED:
dmar_domain = alloc_domain(type);
if (!dmar_domain) {
@@ -4369,6 +4361,7 @@ static bool intel_iommu_capable(struct device *dev, enum iommu_cap cap)
switch (cap) {
case IOMMU_CAP_CACHE_COHERENCY:
+ case IOMMU_CAP_DEFERRED_FLUSH:
return true;
case IOMMU_CAP_PRE_BOOT_PROTECTION:
return dmar_platform_optin();
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index eb620552967b..da340f11c5f5 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -68,6 +68,10 @@ struct group_device {
char *name;
};
+/* Iterate over each struct group_device in a struct iommu_group */
+#define for_each_group_device(group, pos) \
+ list_for_each_entry(pos, &(group)->devices, list)
+
struct iommu_group_attribute {
struct attribute attr;
ssize_t (*show)(struct iommu_group *group, char *buf);
@@ -89,17 +93,39 @@ static const char * const iommu_group_resv_type_string[] = {
static int iommu_bus_notifier(struct notifier_block *nb,
unsigned long action, void *data);
static void iommu_release_device(struct device *dev);
-static int iommu_alloc_default_domain(struct iommu_group *group,
- struct device *dev);
static struct iommu_domain *__iommu_domain_alloc(const struct bus_type *bus,
unsigned type);
static int __iommu_attach_device(struct iommu_domain *domain,
struct device *dev);
static int __iommu_attach_group(struct iommu_domain *domain,
struct iommu_group *group);
+
+enum {
+ IOMMU_SET_DOMAIN_MUST_SUCCEED = 1 << 0,
+};
+
+static int __iommu_device_set_domain(struct iommu_group *group,
+ struct device *dev,
+ struct iommu_domain *new_domain,
+ unsigned int flags);
+static int __iommu_group_set_domain_internal(struct iommu_group *group,
+ struct iommu_domain *new_domain,
+ unsigned int flags);
static int __iommu_group_set_domain(struct iommu_group *group,
- struct iommu_domain *new_domain);
-static int iommu_create_device_direct_mappings(struct iommu_group *group,
+ struct iommu_domain *new_domain)
+{
+ return __iommu_group_set_domain_internal(group, new_domain, 0);
+}
+static void __iommu_group_set_domain_nofail(struct iommu_group *group,
+ struct iommu_domain *new_domain)
+{
+ WARN_ON(__iommu_group_set_domain_internal(
+ group, new_domain, IOMMU_SET_DOMAIN_MUST_SUCCEED));
+}
+
+static int iommu_setup_default_domain(struct iommu_group *group,
+ int target_type);
+static int iommu_create_device_direct_mappings(struct iommu_domain *domain,
struct device *dev);
static struct iommu_group *iommu_group_get_for_dev(struct device *dev);
static ssize_t iommu_group_store_type(struct iommu_group *group,
@@ -176,16 +202,16 @@ static int __init iommu_subsys_init(void)
if (!iommu_default_passthrough() && !iommu_dma_strict)
iommu_def_domain_type = IOMMU_DOMAIN_DMA_FQ;
- pr_info("Default domain type: %s %s\n",
+ pr_info("Default domain type: %s%s\n",
iommu_domain_type_str(iommu_def_domain_type),
(iommu_cmd_line & IOMMU_CMD_LINE_DMA_API) ?
- "(set via kernel command line)" : "");
+ " (set via kernel command line)" : "");
if (!iommu_default_passthrough())
- pr_info("DMA domain TLB invalidation policy: %s mode %s\n",
+ pr_info("DMA domain TLB invalidation policy: %s mode%s\n",
iommu_dma_strict ? "strict" : "lazy",
(iommu_cmd_line & IOMMU_CMD_LINE_STRICT) ?
- "(set via kernel command line)" : "");
+ " (set via kernel command line)" : "");
nb = kcalloc(ARRAY_SIZE(iommu_buses), sizeof(*nb), GFP_KERNEL);
if (!nb)
@@ -343,6 +369,8 @@ static int __iommu_probe_device(struct device *dev, struct list_head *group_list
dev->iommu->iommu_dev = iommu_dev;
dev->iommu->max_pasids = dev_iommu_get_max_pasids(dev);
+ if (ops->is_attach_deferred)
+ dev->iommu->attach_deferred = ops->is_attach_deferred(dev);
group = iommu_group_get_for_dev(dev);
if (IS_ERR(group)) {
@@ -377,30 +405,6 @@ err_unlock:
return ret;
}
-static bool iommu_is_attach_deferred(struct device *dev)
-{
- const struct iommu_ops *ops = dev_iommu_ops(dev);
-
- if (ops->is_attach_deferred)
- return ops->is_attach_deferred(dev);
-
- return false;
-}
-
-static int iommu_group_do_dma_first_attach(struct device *dev, void *data)
-{
- struct iommu_domain *domain = data;
-
- lockdep_assert_held(&dev->iommu_group->mutex);
-
- if (iommu_is_attach_deferred(dev)) {
- dev->iommu->attach_deferred = 1;
- return 0;
- }
-
- return __iommu_attach_device(domain, dev);
-}
-
int iommu_probe_device(struct device *dev)
{
const struct iommu_ops *ops;
@@ -417,29 +421,20 @@ int iommu_probe_device(struct device *dev)
goto err_release;
}
- /*
- * Try to allocate a default domain - needs support from the
- * IOMMU driver. There are still some drivers which don't
- * support default domains, so the return value is not yet
- * checked.
- */
mutex_lock(&group->mutex);
- iommu_alloc_default_domain(group, dev);
- /*
- * If device joined an existing group which has been claimed, don't
- * attach the default domain.
- */
- if (group->default_domain && !group->owner) {
- ret = iommu_group_do_dma_first_attach(dev, group->default_domain);
- if (ret) {
- mutex_unlock(&group->mutex);
- iommu_group_put(group);
- goto err_release;
- }
- }
+ if (group->default_domain)
+ iommu_create_device_direct_mappings(group->default_domain, dev);
- iommu_create_device_direct_mappings(group, dev);
+ if (group->domain) {
+ ret = __iommu_device_set_domain(group, dev, group->domain, 0);
+ if (ret)
+ goto err_unlock;
+ } else if (!group->default_domain) {
+ ret = iommu_setup_default_domain(group, 0);
+ if (ret)
+ goto err_unlock;
+ }
mutex_unlock(&group->mutex);
iommu_group_put(group);
@@ -450,6 +445,9 @@ int iommu_probe_device(struct device *dev)
return 0;
+err_unlock:
+ mutex_unlock(&group->mutex);
+ iommu_group_put(group);
err_release:
iommu_release_device(dev);
@@ -468,7 +466,7 @@ __iommu_group_remove_device(struct iommu_group *group, struct device *dev)
struct group_device *device;
lockdep_assert_held(&group->mutex);
- list_for_each_entry(device, &group->devices, list) {
+ for_each_group_device(group, device) {
if (device->dev == dev) {
list_del(&device->list);
return device;
@@ -707,7 +705,7 @@ int iommu_get_group_resv_regions(struct iommu_group *group,
int ret = 0;
mutex_lock(&group->mutex);
- list_for_each_entry(device, &group->devices, list) {
+ for_each_group_device(group, device) {
struct list_head dev_resv_regions;
/*
@@ -953,16 +951,15 @@ int iommu_group_set_name(struct iommu_group *group, const char *name)
}
EXPORT_SYMBOL_GPL(iommu_group_set_name);
-static int iommu_create_device_direct_mappings(struct iommu_group *group,
+static int iommu_create_device_direct_mappings(struct iommu_domain *domain,
struct device *dev)
{
- struct iommu_domain *domain = group->default_domain;
struct iommu_resv_region *entry;
struct list_head mappings;
unsigned long pg_size;
int ret = 0;
- if (!domain || !iommu_is_dma_domain(domain))
+ if (!iommu_is_dma_domain(domain))
return 0;
BUG_ON(!domain->pgsize_bitmap);
@@ -1069,25 +1066,13 @@ rename:
mutex_lock(&group->mutex);
list_add_tail(&device->list, &group->devices);
- if (group->domain)
- ret = iommu_group_do_dma_first_attach(dev, group->domain);
mutex_unlock(&group->mutex);
- if (ret)
- goto err_put_group;
-
trace_add_device_to_group(group->id, dev);
dev_info(dev, "Adding to iommu group %d\n", group->id);
return 0;
-err_put_group:
- mutex_lock(&group->mutex);
- list_del(&device->list);
- mutex_unlock(&group->mutex);
- dev->iommu_group = NULL;
- kobject_put(group->devices_kobj);
- sysfs_remove_link(group->devices_kobj, device->name);
err_free_name:
kfree(device->name);
err_remove_link:
@@ -1125,31 +1110,6 @@ void iommu_group_remove_device(struct device *dev)
}
EXPORT_SYMBOL_GPL(iommu_group_remove_device);
-static int iommu_group_device_count(struct iommu_group *group)
-{
- struct group_device *entry;
- int ret = 0;
-
- list_for_each_entry(entry, &group->devices, list)
- ret++;
-
- return ret;
-}
-
-static int __iommu_group_for_each_dev(struct iommu_group *group, void *data,
- int (*fn)(struct device *, void *))
-{
- struct group_device *device;
- int ret = 0;
-
- list_for_each_entry(device, &group->devices, list) {
- ret = fn(device->dev, data);
- if (ret)
- break;
- }
- return ret;
-}
-
/**
* iommu_group_for_each_dev - iterate over each device in the group
* @group: the group
@@ -1164,10 +1124,15 @@ static int __iommu_group_for_each_dev(struct iommu_group *group, void *data,
int iommu_group_for_each_dev(struct iommu_group *group, void *data,
int (*fn)(struct device *, void *))
{
- int ret;
+ struct group_device *device;
+ int ret = 0;
mutex_lock(&group->mutex);
- ret = __iommu_group_for_each_dev(group, data, fn);
+ for_each_group_device(group, device) {
+ ret = fn(device->dev, data);
+ if (ret)
+ break;
+ }
mutex_unlock(&group->mutex);
return ret;
@@ -1656,40 +1621,47 @@ static int iommu_get_def_domain_type(struct device *dev)
return 0;
}
-static int iommu_group_alloc_default_domain(const struct bus_type *bus,
- struct iommu_group *group,
- unsigned int type)
+static struct iommu_domain *
+__iommu_group_alloc_default_domain(const struct bus_type *bus,
+ struct iommu_group *group, int req_type)
{
- struct iommu_domain *dom;
-
- dom = __iommu_domain_alloc(bus, type);
- if (!dom && type != IOMMU_DOMAIN_DMA) {
- dom = __iommu_domain_alloc(bus, IOMMU_DOMAIN_DMA);
- if (dom)
- pr_warn("Failed to allocate default IOMMU domain of type %u for group %s - Falling back to IOMMU_DOMAIN_DMA",
- type, group->name);
- }
-
- if (!dom)
- return -ENOMEM;
-
- group->default_domain = dom;
- if (!group->domain)
- group->domain = dom;
- return 0;
+ if (group->default_domain && group->default_domain->type == req_type)
+ return group->default_domain;
+ return __iommu_domain_alloc(bus, req_type);
}
-static int iommu_alloc_default_domain(struct iommu_group *group,
- struct device *dev)
+/*
+ * req_type of 0 means "auto" which means to select a domain based on
+ * iommu_def_domain_type or what the driver actually supports.
+ */
+static struct iommu_domain *
+iommu_group_alloc_default_domain(struct iommu_group *group, int req_type)
{
- unsigned int type;
+ const struct bus_type *bus =
+ list_first_entry(&group->devices, struct group_device, list)
+ ->dev->bus;
+ struct iommu_domain *dom;
- if (group->default_domain)
- return 0;
+ lockdep_assert_held(&group->mutex);
+
+ if (req_type)
+ return __iommu_group_alloc_default_domain(bus, group, req_type);
- type = iommu_get_def_domain_type(dev) ? : iommu_def_domain_type;
+ /* The driver gave no guidance on what type to use, try the default */
+ dom = __iommu_group_alloc_default_domain(bus, group, iommu_def_domain_type);
+ if (dom)
+ return dom;
- return iommu_group_alloc_default_domain(dev->bus, group, type);
+ /* Otherwise IDENTITY and DMA_FQ defaults will try DMA */
+ if (iommu_def_domain_type == IOMMU_DOMAIN_DMA)
+ return NULL;
+ dom = __iommu_group_alloc_default_domain(bus, group, IOMMU_DOMAIN_DMA);
+ if (!dom)
+ return NULL;
+
+ pr_warn("Failed to allocate default IOMMU domain of type %u for group %s - Falling back to IOMMU_DOMAIN_DMA",
+ iommu_def_domain_type, group->name);
+ return dom;
}
/**
@@ -1774,87 +1746,48 @@ static int iommu_bus_notifier(struct notifier_block *nb,
return 0;
}
-struct __group_domain_type {
- struct device *dev;
- unsigned int type;
-};
-
-static int probe_get_default_domain_type(struct device *dev, void *data)
-{
- struct __group_domain_type *gtype = data;
- unsigned int type = iommu_get_def_domain_type(dev);
-
- if (type) {
- if (gtype->type && gtype->type != type) {
- dev_warn(dev, "Device needs domain type %s, but device %s in the same iommu group requires type %s - using default\n",
- iommu_domain_type_str(type),
- dev_name(gtype->dev),
- iommu_domain_type_str(gtype->type));
- gtype->type = 0;
- }
-
- if (!gtype->dev) {
- gtype->dev = dev;
- gtype->type = type;
- }
- }
-
- return 0;
-}
-
-static void probe_alloc_default_domain(const struct bus_type *bus,
- struct iommu_group *group)
+/* A target_type of 0 will select the best domain type and cannot fail */
+static int iommu_get_default_domain_type(struct iommu_group *group,
+ int target_type)
{
- struct __group_domain_type gtype;
+ int best_type = target_type;
+ struct group_device *gdev;
+ struct device *last_dev;
- memset(&gtype, 0, sizeof(gtype));
-
- /* Ask for default domain requirements of all devices in the group */
- __iommu_group_for_each_dev(group, &gtype,
- probe_get_default_domain_type);
-
- if (!gtype.type)
- gtype.type = iommu_def_domain_type;
+ lockdep_assert_held(&group->mutex);
- iommu_group_alloc_default_domain(bus, group, gtype.type);
+ for_each_group_device(group, gdev) {
+ unsigned int type = iommu_get_def_domain_type(gdev->dev);
-}
+ if (best_type && type && best_type != type) {
+ if (target_type) {
+ dev_err_ratelimited(
+ gdev->dev,
+ "Device cannot be in %s domain\n",
+ iommu_domain_type_str(target_type));
+ return -1;
+ }
-static int __iommu_group_dma_first_attach(struct iommu_group *group)
-{
- return __iommu_group_for_each_dev(group, group->default_domain,
- iommu_group_do_dma_first_attach);
+ dev_warn(
+ gdev->dev,
+ "Device needs domain type %s, but device %s in the same iommu group requires type %s - using default\n",
+ iommu_domain_type_str(type), dev_name(last_dev),
+ iommu_domain_type_str(best_type));
+ return 0;
+ }
+ if (!best_type)
+ best_type = type;
+ last_dev = gdev->dev;
+ }
+ return best_type;
}
-static int iommu_group_do_probe_finalize(struct device *dev, void *data)
+static void iommu_group_do_probe_finalize(struct device *dev)
{
const struct iommu_ops *ops = dev_iommu_ops(dev);
if (ops->probe_finalize)
ops->probe_finalize(dev);
-
- return 0;
-}
-
-static void __iommu_group_dma_finalize(struct iommu_group *group)
-{
- __iommu_group_for_each_dev(group, group->default_domain,
- iommu_group_do_probe_finalize);
-}
-
-static int iommu_do_create_direct_mappings(struct device *dev, void *data)
-{
- struct iommu_group *group = data;
-
- iommu_create_device_direct_mappings(group, dev);
-
- return 0;
-}
-
-static int iommu_group_create_direct_mappings(struct iommu_group *group)
-{
- return __iommu_group_for_each_dev(group, group,
- iommu_do_create_direct_mappings);
}
int bus_iommu_probe(const struct bus_type *bus)
@@ -1873,32 +1806,31 @@ int bus_iommu_probe(const struct bus_type *bus)
return ret;
list_for_each_entry_safe(group, next, &group_list, entry) {
+ struct group_device *gdev;
+
mutex_lock(&group->mutex);
/* Remove item from the list */
list_del_init(&group->entry);
- /* Try to allocate default domain */
- probe_alloc_default_domain(bus, group);
-
- if (!group->default_domain) {
+ ret = iommu_setup_default_domain(group, 0);
+ if (ret) {
mutex_unlock(&group->mutex);
- continue;
+ return ret;
}
-
- iommu_group_create_direct_mappings(group);
-
- ret = __iommu_group_dma_first_attach(group);
-
mutex_unlock(&group->mutex);
- if (ret)
- break;
-
- __iommu_group_dma_finalize(group);
+ /*
+ * FIXME: Mis-locked because the ops->probe_finalize() call-back
+ * of some IOMMU drivers calls arm_iommu_attach_device() which
+ * in-turn might call back into IOMMU core code, where it tries
+ * to take group->mutex, resulting in a deadlock.
+ */
+ for_each_group_device(group, gdev)
+ iommu_group_do_probe_finalize(gdev->dev);
}
- return ret;
+ return 0;
}
bool iommu_present(const struct bus_type *bus)
@@ -1946,7 +1878,7 @@ bool iommu_group_has_isolated_msi(struct iommu_group *group)
bool ret = true;
mutex_lock(&group->mutex);
- list_for_each_entry(group_dev, &group->devices, list)
+ for_each_group_device(group, group_dev)
ret &= msi_device_has_isolated_msi(group_dev->dev);
mutex_unlock(&group->mutex);
return ret;
@@ -1980,11 +1912,12 @@ static struct iommu_domain *__iommu_domain_alloc(const struct bus_type *bus,
unsigned type)
{
struct iommu_domain *domain;
+ unsigned int alloc_type = type & IOMMU_DOMAIN_ALLOC_FLAGS;
if (bus == NULL || bus->iommu_ops == NULL)
return NULL;
- domain = bus->iommu_ops->domain_alloc(type);
+ domain = bus->iommu_ops->domain_alloc(alloc_type);
if (!domain)
return NULL;
@@ -2028,15 +1961,13 @@ EXPORT_SYMBOL_GPL(iommu_domain_free);
static void __iommu_group_set_core_domain(struct iommu_group *group)
{
struct iommu_domain *new_domain;
- int ret;
if (group->owner)
new_domain = group->blocking_domain;
else
new_domain = group->default_domain;
- ret = __iommu_group_set_domain(group, new_domain);
- WARN(ret, "iommu driver failed to attach the default/blocking domain");
+ __iommu_group_set_domain_nofail(group, new_domain);
}
static int __iommu_attach_device(struct iommu_domain *domain,
@@ -2082,7 +2013,7 @@ int iommu_attach_device(struct iommu_domain *domain, struct device *dev)
*/
mutex_lock(&group->mutex);
ret = -EINVAL;
- if (iommu_group_device_count(group) != 1)
+ if (list_count_nodes(&group->devices) != 1)
goto out_unlock;
ret = __iommu_attach_group(domain, group);
@@ -2113,7 +2044,7 @@ void iommu_detach_device(struct iommu_domain *domain, struct device *dev)
mutex_lock(&group->mutex);
if (WARN_ON(domain != group->domain) ||
- WARN_ON(iommu_group_device_count(group) != 1))
+ WARN_ON(list_count_nodes(&group->devices) != 1))
goto out_unlock;
__iommu_group_set_core_domain(group);
@@ -2149,52 +2080,14 @@ struct iommu_domain *iommu_get_dma_domain(struct device *dev)
return dev->iommu_group->default_domain;
}
-/*
- * IOMMU groups are really the natural working unit of the IOMMU, but
- * the IOMMU API works on domains and devices. Bridge that gap by
- * iterating over the devices in a group. Ideally we'd have a single
- * device which represents the requestor ID of the group, but we also
- * allow IOMMU drivers to create policy defined minimum sets, where
- * the physical hardware may be able to distiguish members, but we
- * wish to group them at a higher level (ex. untrusted multi-function
- * PCI devices). Thus we attach each device.
- */
-static int iommu_group_do_attach_device(struct device *dev, void *data)
-{
- struct iommu_domain *domain = data;
-
- return __iommu_attach_device(domain, dev);
-}
-
static int __iommu_attach_group(struct iommu_domain *domain,
struct iommu_group *group)
{
- int ret;
-
if (group->domain && group->domain != group->default_domain &&
group->domain != group->blocking_domain)
return -EBUSY;
- ret = __iommu_group_for_each_dev(group, domain,
- iommu_group_do_attach_device);
- if (ret == 0) {
- group->domain = domain;
- } else {
- /*
- * To recover from the case when certain device within the
- * group fails to attach to the new domain, we need force
- * attaching all devices back to the old domain. The old
- * domain is compatible for all devices in the group,
- * hence the iommu driver should always return success.
- */
- struct iommu_domain *old_domain = group->domain;
-
- group->domain = NULL;
- WARN(__iommu_group_set_domain(group, old_domain),
- "iommu driver failed to attach a compatible domain");
- }
-
- return ret;
+ return __iommu_group_set_domain(group, domain);
}
/**
@@ -2221,21 +2114,61 @@ int iommu_attach_group(struct iommu_domain *domain, struct iommu_group *group)
}
EXPORT_SYMBOL_GPL(iommu_attach_group);
-static int iommu_group_do_set_platform_dma(struct device *dev, void *data)
+static int __iommu_device_set_domain(struct iommu_group *group,
+ struct device *dev,
+ struct iommu_domain *new_domain,
+ unsigned int flags)
{
- const struct iommu_ops *ops = dev_iommu_ops(dev);
+ int ret;
- if (!WARN_ON(!ops->set_platform_dma_ops))
- ops->set_platform_dma_ops(dev);
+ if (dev->iommu->attach_deferred) {
+ if (new_domain == group->default_domain)
+ return 0;
+ dev->iommu->attach_deferred = 0;
+ }
+ ret = __iommu_attach_device(new_domain, dev);
+ if (ret) {
+ /*
+ * If we have a blocking domain then try to attach that in hopes
+ * of avoiding a UAF. Modern drivers should implement blocking
+ * domains as global statics that cannot fail.
+ */
+ if ((flags & IOMMU_SET_DOMAIN_MUST_SUCCEED) &&
+ group->blocking_domain &&
+ group->blocking_domain != new_domain)
+ __iommu_attach_device(group->blocking_domain, dev);
+ return ret;
+ }
return 0;
}
-static int __iommu_group_set_domain(struct iommu_group *group,
- struct iommu_domain *new_domain)
+/*
+ * If 0 is returned the group's domain is new_domain. If an error is returned
+ * then the group's domain will be set back to the existing domain unless
+ * IOMMU_SET_DOMAIN_MUST_SUCCEED, otherwise an error is returned and the group's
+ * domains is left inconsistent. This is a driver bug to fail attach with a
+ * previously good domain. We try to avoid a kernel UAF because of this.
+ *
+ * IOMMU groups are really the natural working unit of the IOMMU, but the IOMMU
+ * API works on domains and devices. Bridge that gap by iterating over the
+ * devices in a group. Ideally we'd have a single device which represents the
+ * requestor ID of the group, but we also allow IOMMU drivers to create policy
+ * defined minimum sets, where the physical hardware may be able to distiguish
+ * members, but we wish to group them at a higher level (ex. untrusted
+ * multi-function PCI devices). Thus we attach each device.
+ */
+static int __iommu_group_set_domain_internal(struct iommu_group *group,
+ struct iommu_domain *new_domain,
+ unsigned int flags)
{
+ struct group_device *last_gdev;
+ struct group_device *gdev;
+ int result;
int ret;
+ lockdep_assert_held(&group->mutex);
+
if (group->domain == new_domain)
return 0;
@@ -2245,8 +2178,12 @@ static int __iommu_group_set_domain(struct iommu_group *group,
* platform specific behavior.
*/
if (!new_domain) {
- __iommu_group_for_each_dev(group, NULL,
- iommu_group_do_set_platform_dma);
+ for_each_group_device(group, gdev) {
+ const struct iommu_ops *ops = dev_iommu_ops(gdev->dev);
+
+ if (!WARN_ON(!ops->set_platform_dma_ops))
+ ops->set_platform_dma_ops(gdev->dev);
+ }
group->domain = NULL;
return 0;
}
@@ -2256,16 +2193,52 @@ static int __iommu_group_set_domain(struct iommu_group *group,
* domain. This switch does not have to be atomic and DMA can be
* discarded during the transition. DMA must only be able to access
* either new_domain or group->domain, never something else.
- *
- * Note that this is called in error unwind paths, attaching to a
- * domain that has already been attached cannot fail.
*/
- ret = __iommu_group_for_each_dev(group, new_domain,
- iommu_group_do_attach_device);
- if (ret)
- return ret;
+ result = 0;
+ for_each_group_device(group, gdev) {
+ ret = __iommu_device_set_domain(group, gdev->dev, new_domain,
+ flags);
+ if (ret) {
+ result = ret;
+ /*
+ * Keep trying the other devices in the group. If a
+ * driver fails attach to an otherwise good domain, and
+ * does not support blocking domains, it should at least
+ * drop its reference on the current domain so we don't
+ * UAF.
+ */
+ if (flags & IOMMU_SET_DOMAIN_MUST_SUCCEED)
+ continue;
+ goto err_revert;
+ }
+ }
group->domain = new_domain;
- return 0;
+ return result;
+
+err_revert:
+ /*
+ * This is called in error unwind paths. A well behaved driver should
+ * always allow us to attach to a domain that was already attached.
+ */
+ last_gdev = gdev;
+ for_each_group_device(group, gdev) {
+ const struct iommu_ops *ops = dev_iommu_ops(gdev->dev);
+
+ /*
+ * If set_platform_dma_ops is not present a NULL domain can
+ * happen only for first probe, in which case we leave
+ * group->domain as NULL and let release clean everything up.
+ */
+ if (group->domain)
+ WARN_ON(__iommu_device_set_domain(
+ group, gdev->dev, group->domain,
+ IOMMU_SET_DOMAIN_MUST_SUCCEED));
+ else if (ops->set_platform_dma_ops)
+ ops->set_platform_dma_ops(gdev->dev);
+ if (gdev == last_gdev)
+ break;
+ }
+ return ret;
}
void iommu_detach_group(struct iommu_domain *domain, struct iommu_group *group)
@@ -2846,78 +2819,112 @@ int iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features feat)
}
EXPORT_SYMBOL_GPL(iommu_dev_disable_feature);
-/*
- * Changes the default domain of an iommu group
- *
- * @group: The group for which the default domain should be changed
- * @dev: The first device in the group
- * @type: The type of the new default domain that gets associated with the group
- *
- * Returns 0 on success and error code on failure
+/**
+ * iommu_setup_default_domain - Set the default_domain for the group
+ * @group: Group to change
+ * @target_type: Domain type to set as the default_domain
*
- * Note:
- * 1. Presently, this function is called only when user requests to change the
- * group's default domain type through /sys/kernel/iommu_groups/<grp_id>/type
- * Please take a closer look if intended to use for other purposes.
+ * Allocate a default domain and set it as the current domain on the group. If
+ * the group already has a default domain it will be changed to the target_type.
+ * When target_type is 0 the default domain is selected based on driver and
+ * system preferences.
*/
-static int iommu_change_dev_def_domain(struct iommu_group *group,
- struct device *dev, int type)
+static int iommu_setup_default_domain(struct iommu_group *group,
+ int target_type)
{
- struct __group_domain_type gtype = {NULL, 0};
- struct iommu_domain *prev_dom;
+ struct iommu_domain *old_dom = group->default_domain;
+ struct group_device *gdev;
+ struct iommu_domain *dom;
+ bool direct_failed;
+ int req_type;
int ret;
lockdep_assert_held(&group->mutex);
- prev_dom = group->default_domain;
- __iommu_group_for_each_dev(group, &gtype,
- probe_get_default_domain_type);
- if (!type) {
- /*
- * If the user hasn't requested any specific type of domain and
- * if the device supports both the domains, then default to the
- * domain the device was booted with
- */
- type = gtype.type ? : iommu_def_domain_type;
- } else if (gtype.type && type != gtype.type) {
- dev_err_ratelimited(dev, "Device cannot be in %s domain\n",
- iommu_domain_type_str(type));
+ req_type = iommu_get_default_domain_type(group, target_type);
+ if (req_type < 0)
return -EINVAL;
- }
/*
- * Switch to a new domain only if the requested domain type is different
- * from the existing default domain type
+ * There are still some drivers which don't support default domains, so
+ * we ignore the failure and leave group->default_domain NULL.
+ *
+ * We assume that the iommu driver starts up the device in
+ * 'set_platform_dma_ops' mode if it does not support default domains.
*/
- if (prev_dom->type == type)
+ dom = iommu_group_alloc_default_domain(group, req_type);
+ if (!dom) {
+ /* Once in default_domain mode we never leave */
+ if (group->default_domain)
+ return -ENODEV;
+ group->default_domain = NULL;
return 0;
+ }
- group->default_domain = NULL;
- group->domain = NULL;
-
- /* Sets group->default_domain to the newly allocated domain */
- ret = iommu_group_alloc_default_domain(dev->bus, group, type);
- if (ret)
- goto restore_old_domain;
-
- ret = iommu_group_create_direct_mappings(group);
- if (ret)
- goto free_new_domain;
-
- ret = __iommu_attach_group(group->default_domain, group);
- if (ret)
- goto free_new_domain;
+ if (group->default_domain == dom)
+ return 0;
- iommu_domain_free(prev_dom);
+ /*
+ * IOMMU_RESV_DIRECT and IOMMU_RESV_DIRECT_RELAXABLE regions must be
+ * mapped before their device is attached, in order to guarantee
+ * continuity with any FW activity
+ */
+ direct_failed = false;
+ for_each_group_device(group, gdev) {
+ if (iommu_create_device_direct_mappings(dom, gdev->dev)) {
+ direct_failed = true;
+ dev_warn_once(
+ gdev->dev->iommu->iommu_dev->dev,
+ "IOMMU driver was not able to establish FW requested direct mapping.");
+ }
+ }
- return 0;
+ /* We must set default_domain early for __iommu_device_set_domain */
+ group->default_domain = dom;
+ if (!group->domain) {
+ /*
+ * Drivers are not allowed to fail the first domain attach.
+ * The only way to recover from this is to fail attaching the
+ * iommu driver and call ops->release_device. Put the domain
+ * in group->default_domain so it is freed after.
+ */
+ ret = __iommu_group_set_domain_internal(
+ group, dom, IOMMU_SET_DOMAIN_MUST_SUCCEED);
+ if (WARN_ON(ret))
+ goto out_free;
+ } else {
+ ret = __iommu_group_set_domain(group, dom);
+ if (ret) {
+ iommu_domain_free(dom);
+ group->default_domain = old_dom;
+ return ret;
+ }
+ }
-free_new_domain:
- iommu_domain_free(group->default_domain);
-restore_old_domain:
- group->default_domain = prev_dom;
- group->domain = prev_dom;
+ /*
+ * Drivers are supposed to allow mappings to be installed in a domain
+ * before device attachment, but some don't. Hack around this defect by
+ * trying again after attaching. If this happens it means the device
+ * will not continuously have the IOMMU_RESV_DIRECT map.
+ */
+ if (direct_failed) {
+ for_each_group_device(group, gdev) {
+ ret = iommu_create_device_direct_mappings(dom, gdev->dev);
+ if (ret)
+ goto err_restore;
+ }
+ }
+err_restore:
+ if (old_dom) {
+ __iommu_group_set_domain_internal(
+ group, old_dom, IOMMU_SET_DOMAIN_MUST_SUCCEED);
+ iommu_domain_free(dom);
+ old_dom = NULL;
+ }
+out_free:
+ if (old_dom)
+ iommu_domain_free(old_dom);
return ret;
}
@@ -2933,8 +2940,7 @@ restore_old_domain:
static ssize_t iommu_group_store_type(struct iommu_group *group,
const char *buf, size_t count)
{
- struct group_device *grp_dev;
- struct device *dev;
+ struct group_device *gdev;
int ret, req_type;
if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO))
@@ -2959,23 +2965,23 @@ static ssize_t iommu_group_store_type(struct iommu_group *group,
if (req_type == IOMMU_DOMAIN_DMA_FQ &&
group->default_domain->type == IOMMU_DOMAIN_DMA) {
ret = iommu_dma_init_fq(group->default_domain);
- if (!ret)
- group->default_domain->type = IOMMU_DOMAIN_DMA_FQ;
- mutex_unlock(&group->mutex);
+ if (ret)
+ goto out_unlock;
- return ret ?: count;
+ group->default_domain->type = IOMMU_DOMAIN_DMA_FQ;
+ ret = count;
+ goto out_unlock;
}
/* Otherwise, ensure that device exists and no driver is bound. */
if (list_empty(&group->devices) || group->owner_cnt) {
- mutex_unlock(&group->mutex);
- return -EPERM;
+ ret = -EPERM;
+ goto out_unlock;
}
- grp_dev = list_first_entry(&group->devices, struct group_device, list);
- dev = grp_dev->dev;
-
- ret = iommu_change_dev_def_domain(group, dev, req_type);
+ ret = iommu_setup_default_domain(group, req_type);
+ if (ret)
+ goto out_unlock;
/*
* Release the mutex here because ops->probe_finalize() call-back of
@@ -2986,9 +2992,12 @@ static ssize_t iommu_group_store_type(struct iommu_group *group,
mutex_unlock(&group->mutex);
/* Make sure dma_ops is appropriatley set */
- if (!ret)
- __iommu_group_dma_finalize(group);
+ for_each_group_device(group, gdev)
+ iommu_group_do_probe_finalize(gdev->dev);
+ return count;
+out_unlock:
+ mutex_unlock(&group->mutex);
return ret ?: count;
}
@@ -3182,16 +3191,13 @@ EXPORT_SYMBOL_GPL(iommu_device_claim_dma_owner);
static void __iommu_release_dma_ownership(struct iommu_group *group)
{
- int ret;
-
if (WARN_ON(!group->owner_cnt || !group->owner ||
!xa_empty(&group->pasid_array)))
return;
group->owner_cnt = 0;
group->owner = NULL;
- ret = __iommu_group_set_domain(group, group->default_domain);
- WARN(ret, "iommu driver failed to attach the default domain");
+ __iommu_group_set_domain_nofail(group, group->default_domain);
}
/**
@@ -3253,7 +3259,7 @@ static int __iommu_set_group_pasid(struct iommu_domain *domain,
struct group_device *device;
int ret = 0;
- list_for_each_entry(device, &group->devices, list) {
+ for_each_group_device(group, device) {
ret = domain->ops->set_dev_pasid(domain, device->dev, pasid);
if (ret)
break;
@@ -3268,7 +3274,7 @@ static void __iommu_remove_group_pasid(struct iommu_group *group,
struct group_device *device;
const struct iommu_ops *ops;
- list_for_each_entry(device, &group->devices, list) {
+ for_each_group_device(group, device) {
ops = dev_iommu_ops(device->dev);
ops->remove_dev_pasid(device->dev, pasid);
}
diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
index fe452ce46642..10b964600948 100644
--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c
@@ -647,7 +647,13 @@ struct iova_rcache {
static struct iova_magazine *iova_magazine_alloc(gfp_t flags)
{
- return kzalloc(sizeof(struct iova_magazine), flags);
+ struct iova_magazine *mag;
+
+ mag = kmalloc(sizeof(*mag), flags);
+ if (mag)
+ mag->size = 0;
+
+ return mag;
}
static void iova_magazine_free(struct iova_magazine *mag)
diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c
index 5b8fe9bfa9a5..3551ed057774 100644
--- a/drivers/iommu/virtio-iommu.c
+++ b/drivers/iommu/virtio-iommu.c
@@ -788,6 +788,29 @@ static int viommu_attach_dev(struct iommu_domain *domain, struct device *dev)
return 0;
}
+static void viommu_detach_dev(struct viommu_endpoint *vdev)
+{
+ int i;
+ struct virtio_iommu_req_detach req;
+ struct viommu_domain *vdomain = vdev->vdomain;
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(vdev->dev);
+
+ if (!vdomain)
+ return;
+
+ req = (struct virtio_iommu_req_detach) {
+ .head.type = VIRTIO_IOMMU_T_DETACH,
+ .domain = cpu_to_le32(vdomain->id),
+ };
+
+ for (i = 0; i < fwspec->num_ids; i++) {
+ req.endpoint = cpu_to_le32(fwspec->ids[i]);
+ WARN_ON(viommu_send_req_sync(vdev->viommu, &req, sizeof(req)));
+ }
+ vdomain->nr_endpoints--;
+ vdev->vdomain = NULL;
+}
+
static int viommu_map_pages(struct iommu_domain *domain, unsigned long iova,
phys_addr_t paddr, size_t pgsize, size_t pgcount,
int prot, gfp_t gfp, size_t *mapped)
@@ -810,25 +833,26 @@ static int viommu_map_pages(struct iommu_domain *domain, unsigned long iova,
if (ret)
return ret;
- map = (struct virtio_iommu_req_map) {
- .head.type = VIRTIO_IOMMU_T_MAP,
- .domain = cpu_to_le32(vdomain->id),
- .virt_start = cpu_to_le64(iova),
- .phys_start = cpu_to_le64(paddr),
- .virt_end = cpu_to_le64(end),
- .flags = cpu_to_le32(flags),
- };
-
- if (!vdomain->nr_endpoints)
- return 0;
+ if (vdomain->nr_endpoints) {
+ map = (struct virtio_iommu_req_map) {
+ .head.type = VIRTIO_IOMMU_T_MAP,
+ .domain = cpu_to_le32(vdomain->id),
+ .virt_start = cpu_to_le64(iova),
+ .phys_start = cpu_to_le64(paddr),
+ .virt_end = cpu_to_le64(end),
+ .flags = cpu_to_le32(flags),
+ };
- ret = viommu_send_req_sync(vdomain->viommu, &map, sizeof(map));
- if (ret)
- viommu_del_mappings(vdomain, iova, end);
- else if (mapped)
+ ret = viommu_send_req_sync(vdomain->viommu, &map, sizeof(map));
+ if (ret) {
+ viommu_del_mappings(vdomain, iova, end);
+ return ret;
+ }
+ }
+ if (mapped)
*mapped = size;
- return ret;
+ return 0;
}
static size_t viommu_unmap_pages(struct iommu_domain *domain, unsigned long iova,
@@ -990,6 +1014,7 @@ static void viommu_release_device(struct device *dev)
{
struct viommu_endpoint *vdev = dev_iommu_priv_get(dev);
+ viommu_detach_dev(vdev);
iommu_put_resv_regions(dev, &vdev->resv_regions);
kfree(vdev);
}
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index e8c9a7da1060..d31642596675 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -65,6 +65,7 @@ struct iommu_domain_geometry {
#define __IOMMU_DOMAIN_SVA (1U << 4) /* Shared process address space */
+#define IOMMU_DOMAIN_ALLOC_FLAGS ~__IOMMU_DOMAIN_DMA_FQ
/*
* This are the possible domain-types
*
@@ -127,6 +128,11 @@ enum iommu_cap {
* this device.
*/
IOMMU_CAP_ENFORCE_CACHE_COHERENCY,
+ /*
+ * IOMMU driver does not issue TLB maintenance during .unmap, so can
+ * usefully support the non-strict DMA flush queue.
+ */
+ IOMMU_CAP_DEFERRED_FLUSH,
};
/* These are the possible reserved region types */