diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2023-06-29 20:51:03 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2023-06-29 20:51:03 -0700 |
commit | d35ac6ac0e80e55bcea79af18d935f19a3e8554c (patch) | |
tree | 84281d7724f5e67c2c0a599dd9ffdeccb96bce9f | |
parent | 0b26eadbf200abf6c97c6d870286c73219cdac65 (diff) | |
parent | a7a334076dd725b8c3b5d64f68e3992ffcfd1d25 (diff) | |
download | lwn-d35ac6ac0e80e55bcea79af18d935f19a3e8554c.tar.gz lwn-d35ac6ac0e80e55bcea79af18d935f19a3e8554c.zip |
Merge tag 'iommu-updates-v6.5' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu
Pull iommu updates from Joerg Roedel:
"Core changes:
- iova_magazine_alloc() optimization
- Make flush-queue an IOMMU driver capability
- Consolidate the error handling around device attachment
AMD IOMMU changes:
- AVIC Interrupt Remapping Improvements
- Some minor fixes and cleanups
Intel VT-d changes from Lu Baolu:
- Small and misc cleanups
ARM-SMMU changes from Will Deacon:
- Device-tree binding updates:
- Add missing clocks for SC8280XP and SA8775 Adreno SMMUs
- Add two new Qualcomm SMMUs in SDX75 and SM6375
- Workarounds for Arm MMU-700 errata:
- 1076982: Avoid use of SEV-based cmdq wakeup
- 2812531: Terminate command batches with a CMD_SYNC
- Enforce single-stage translation to avoid nesting-related errata
- Set the correct level hint for range TLB invalidation on teardown
.. and some other minor fixes and cleanups (including Freescale PAMU
and virtio-iommu changes)"
* tag 'iommu-updates-v6.5' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu: (50 commits)
iommu/vt-d: Remove commented-out code
iommu/vt-d: Remove two WARN_ON in domain_context_mapping_one()
iommu/vt-d: Handle the failure case of dmar_reenable_qi()
iommu/vt-d: Remove unnecessary (void*) conversions
iommu/amd: Remove extern from function prototypes
iommu/amd: Use BIT/BIT_ULL macro to define bit fields
iommu/amd: Fix DTE_IRQ_PHYS_ADDR_MASK macro
iommu/amd: Fix compile error for unused function
iommu/amd: Improving Interrupt Remapping Table Invalidation
iommu/amd: Do not Invalidate IRT when IRTE caching is disabled
iommu/amd: Introduce Disable IRTE Caching Support
iommu/amd: Remove the unused struct amd_ir_data.ref
iommu/amd: Switch amd_iommu_update_ga() to use modify_irte_ga()
iommu/arm-smmu-v3: Set TTL invalidation hint better
iommu/arm-smmu-v3: Document nesting-related errata
iommu/arm-smmu-v3: Add explicit feature for nesting
iommu/arm-smmu-v3: Document MMU-700 erratum 2812531
iommu/arm-smmu-v3: Work around MMU-600 erratum 1076982
dt-bindings: arm-smmu: Add SDX75 SMMU compatible
dt-bindings: arm-smmu: Add SM6375 GPU SMMU
...
-rw-r--r-- | .clang-format | 1 | ||||
-rw-r--r-- | Documentation/admin-guide/kernel-parameters.txt | 1 | ||||
-rw-r--r-- | Documentation/arch/arm64/silicon-errata.rst | 4 | ||||
-rw-r--r-- | Documentation/devicetree/bindings/iommu/arm,smmu.yaml | 12 | ||||
-rw-r--r-- | arch/powerpc/sysdev/fsl_pci.c | 1 | ||||
-rw-r--r-- | drivers/iommu/amd/amd_iommu.h | 91 | ||||
-rw-r--r-- | drivers/iommu/amd/amd_iommu_types.h | 87 | ||||
-rw-r--r-- | drivers/iommu/amd/init.c | 38 | ||||
-rw-r--r-- | drivers/iommu/amd/io_pgtable.c | 4 | ||||
-rw-r--r-- | drivers/iommu/amd/io_pgtable_v2.c | 2 | ||||
-rw-r--r-- | drivers/iommu/amd/iommu.c | 99 | ||||
-rw-r--r-- | drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 61 | ||||
-rw-r--r-- | drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 8 | ||||
-rw-r--r-- | drivers/iommu/arm/arm-smmu/arm-smmu.c | 4 | ||||
-rw-r--r-- | drivers/iommu/dma-iommu.c | 3 | ||||
-rw-r--r-- | drivers/iommu/fsl_pamu_domain.c | 113 | ||||
-rw-r--r-- | drivers/iommu/intel/iommu.c | 29 | ||||
-rw-r--r-- | drivers/iommu/iommu.c | 694 | ||||
-rw-r--r-- | drivers/iommu/iova.c | 8 | ||||
-rw-r--r-- | drivers/iommu/virtio-iommu.c | 57 | ||||
-rw-r--r-- | include/linux/iommu.h | 6 |
21 files changed, 715 insertions, 608 deletions
diff --git a/.clang-format b/.clang-format index 0d1ed8776733..0bbb1991defe 100644 --- a/.clang-format +++ b/.clang-format @@ -254,6 +254,7 @@ ForEachMacros: - 'for_each_free_mem_range' - 'for_each_free_mem_range_reverse' - 'for_each_func_rsrc' + - 'for_each_group_device' - 'for_each_group_evsel' - 'for_each_group_member' - 'for_each_hstate' diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index b0dd367dfb59..44bcaf791ce6 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -323,6 +323,7 @@ option with care. pgtbl_v1 - Use v1 page table for DMA-API (Default). pgtbl_v2 - Use v2 page table for DMA-API. + irtcachedis - Disable Interrupt Remapping Table (IRT) caching. amd_iommu_dump= [HW,X86-64] Enable AMD IOMMU driver option to dump the ACPI table diff --git a/Documentation/arch/arm64/silicon-errata.rst b/Documentation/arch/arm64/silicon-errata.rst index d6430ade349d..f093a9d8bc5c 100644 --- a/Documentation/arch/arm64/silicon-errata.rst +++ b/Documentation/arch/arm64/silicon-errata.rst @@ -140,6 +140,10 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | ARM | MMU-500 | #841119,826419 | N/A | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | MMU-600 | #1076982,1209401| N/A | ++----------------+-----------------+-----------------+-----------------------------+ +| ARM | MMU-700 | #2268618,2812531| N/A | ++----------------+-----------------+-----------------+-----------------------------+ +----------------+-----------------+-----------------+-----------------------------+ | Broadcom | Brahma-B53 | N/A | ARM64_ERRATUM_845719 | +----------------+-----------------+-----------------+-----------------------------+ diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml index ba677d401e24..3a31a979709b 100644 --- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml +++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml @@ -29,6 +29,7 @@ properties: - qcom,msm8996-smmu-v2 - qcom,msm8998-smmu-v2 - qcom,sdm630-smmu-v2 + - qcom,sm6375-smmu-v2 - const: qcom,smmu-v2 - description: Qcom SoCs implementing "qcom,smmu-500" and "arm,mmu-500" @@ -45,6 +46,7 @@ properties: - qcom,sdm845-smmu-500 - qcom,sdx55-smmu-500 - qcom,sdx65-smmu-500 + - qcom,sdx75-smmu-500 - qcom,sm6115-smmu-500 - qcom,sm6125-smmu-500 - qcom,sm6350-smmu-500 @@ -79,7 +81,9 @@ properties: - description: Qcom Adreno GPUs implementing "qcom,smmu-500" and "arm,mmu-500" items: - enum: + - qcom,sa8775p-smmu-500 - qcom,sc7280-smmu-500 + - qcom,sc8280xp-smmu-500 - qcom,sm6115-smmu-500 - qcom,sm6125-smmu-500 - qcom,sm8150-smmu-500 @@ -267,6 +271,7 @@ allOf: enum: - qcom,msm8998-smmu-v2 - qcom,sdm630-smmu-v2 + - qcom,sm6375-smmu-v2 then: anyOf: - properties: @@ -331,7 +336,10 @@ allOf: properties: compatible: contains: - const: qcom,sc7280-smmu-500 + enum: + - qcom,sa8775p-smmu-500 + - qcom,sc7280-smmu-500 + - qcom,sc8280xp-smmu-500 then: properties: clock-names: @@ -413,10 +421,8 @@ allOf: - nvidia,smmu-500 - qcom,qcm2290-smmu-500 - qcom,qdu1000-smmu-500 - - qcom,sa8775p-smmu-500 - qcom,sc7180-smmu-500 - qcom,sc8180x-smmu-500 - - qcom,sc8280xp-smmu-500 - qcom,sdm670-smmu-500 - qcom,sdm845-smmu-500 - qcom,sdx55-smmu-500 diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c index b7232c46b244..6daf620b63a4 100644 --- a/arch/powerpc/sysdev/fsl_pci.c +++ b/arch/powerpc/sysdev/fsl_pci.c @@ -1353,6 +1353,7 @@ static struct platform_driver fsl_pci_driver = { .of_match_table = pci_ids, }, .probe = fsl_pci_probe, + .driver_managed_dma = true, }; static int __init fsl_pci_init(void) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 9beeceb9d825..0c35018239ce 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -11,12 +11,15 @@ #include "amd_iommu_types.h" -extern irqreturn_t amd_iommu_int_thread(int irq, void *data); -extern irqreturn_t amd_iommu_int_handler(int irq, void *data); -extern void amd_iommu_apply_erratum_63(struct amd_iommu *iommu, u16 devid); -extern void amd_iommu_restart_event_logging(struct amd_iommu *iommu); -extern void amd_iommu_restart_ga_log(struct amd_iommu *iommu); -extern void amd_iommu_set_rlookup_table(struct amd_iommu *iommu, u16 devid); +irqreturn_t amd_iommu_int_thread(int irq, void *data); +irqreturn_t amd_iommu_int_handler(int irq, void *data); +void amd_iommu_apply_erratum_63(struct amd_iommu *iommu, u16 devid); +void amd_iommu_restart_event_logging(struct amd_iommu *iommu); +void amd_iommu_restart_ga_log(struct amd_iommu *iommu); +int amd_iommu_init_devices(void); +void amd_iommu_uninit_devices(void); +void amd_iommu_init_notifier(void); +void amd_iommu_set_rlookup_table(struct amd_iommu *iommu, u16 devid); #ifdef CONFIG_AMD_IOMMU_DEBUGFS void amd_iommu_debugfs_setup(struct amd_iommu *iommu); @@ -25,11 +28,11 @@ static inline void amd_iommu_debugfs_setup(struct amd_iommu *iommu) {} #endif /* Needed for interrupt remapping */ -extern int amd_iommu_prepare(void); -extern int amd_iommu_enable(void); -extern void amd_iommu_disable(void); -extern int amd_iommu_reenable(int); -extern int amd_iommu_enable_faulting(void); +int amd_iommu_prepare(void); +int amd_iommu_enable(void); +void amd_iommu_disable(void); +int amd_iommu_reenable(int mode); +int amd_iommu_enable_faulting(void); extern int amd_iommu_guest_ir; extern enum io_pgtable_fmt amd_iommu_pgtable; extern int amd_iommu_gpt_level; @@ -37,33 +40,32 @@ extern int amd_iommu_gpt_level; /* IOMMUv2 specific functions */ struct iommu_domain; -extern bool amd_iommu_v2_supported(void); -extern struct amd_iommu *get_amd_iommu(unsigned int idx); -extern u8 amd_iommu_pc_get_max_banks(unsigned int idx); -extern bool amd_iommu_pc_supported(void); -extern u8 amd_iommu_pc_get_max_counters(unsigned int idx); -extern int amd_iommu_pc_get_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, - u8 fxn, u64 *value); -extern int amd_iommu_pc_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, - u8 fxn, u64 *value); - -extern int amd_iommu_register_ppr_notifier(struct notifier_block *nb); -extern int amd_iommu_unregister_ppr_notifier(struct notifier_block *nb); -extern void amd_iommu_domain_direct_map(struct iommu_domain *dom); -extern int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids); -extern int amd_iommu_flush_page(struct iommu_domain *dom, u32 pasid, - u64 address); -extern void amd_iommu_update_and_flush_device_table(struct protection_domain *domain); -extern void amd_iommu_domain_update(struct protection_domain *domain); -extern void amd_iommu_domain_flush_complete(struct protection_domain *domain); -extern void amd_iommu_domain_flush_tlb_pde(struct protection_domain *domain); -extern int amd_iommu_flush_tlb(struct iommu_domain *dom, u32 pasid); -extern int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, u32 pasid, - unsigned long cr3); -extern int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, u32 pasid); +bool amd_iommu_v2_supported(void); +struct amd_iommu *get_amd_iommu(unsigned int idx); +u8 amd_iommu_pc_get_max_banks(unsigned int idx); +bool amd_iommu_pc_supported(void); +u8 amd_iommu_pc_get_max_counters(unsigned int idx); +int amd_iommu_pc_get_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, + u8 fxn, u64 *value); +int amd_iommu_pc_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, + u8 fxn, u64 *value); + +int amd_iommu_register_ppr_notifier(struct notifier_block *nb); +int amd_iommu_unregister_ppr_notifier(struct notifier_block *nb); +void amd_iommu_domain_direct_map(struct iommu_domain *dom); +int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids); +int amd_iommu_flush_page(struct iommu_domain *dom, u32 pasid, u64 address); +void amd_iommu_update_and_flush_device_table(struct protection_domain *domain); +void amd_iommu_domain_update(struct protection_domain *domain); +void amd_iommu_domain_flush_complete(struct protection_domain *domain); +void amd_iommu_domain_flush_tlb_pde(struct protection_domain *domain); +int amd_iommu_flush_tlb(struct iommu_domain *dom, u32 pasid); +int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, u32 pasid, + unsigned long cr3); +int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, u32 pasid); #ifdef CONFIG_IRQ_REMAP -extern int amd_iommu_create_irq_domain(struct amd_iommu *iommu); +int amd_iommu_create_irq_domain(struct amd_iommu *iommu); #else static inline int amd_iommu_create_irq_domain(struct amd_iommu *iommu) { @@ -75,8 +77,8 @@ static inline int amd_iommu_create_irq_domain(struct amd_iommu *iommu) #define PPR_INVALID 0x1 #define PPR_FAILURE 0xf -extern int amd_iommu_complete_ppr(struct pci_dev *pdev, u32 pasid, - int status, int tag); +int amd_iommu_complete_ppr(struct pci_dev *pdev, u32 pasid, + int status, int tag); static inline bool is_rd890_iommu(struct pci_dev *pdev) { @@ -129,10 +131,9 @@ static inline void *alloc_pgtable_page(int nid, gfp_t gfp) return page ? page_address(page) : NULL; } -extern bool translation_pre_enabled(struct amd_iommu *iommu); -extern bool amd_iommu_is_attach_deferred(struct device *dev); -extern int __init add_special_device(u8 type, u8 id, u32 *devid, - bool cmd_line); +bool translation_pre_enabled(struct amd_iommu *iommu); +bool amd_iommu_is_attach_deferred(struct device *dev); +int __init add_special_device(u8 type, u8 id, u32 *devid, bool cmd_line); #ifdef CONFIG_DMI void amd_iommu_apply_ivrs_quirks(void); @@ -140,9 +141,9 @@ void amd_iommu_apply_ivrs_quirks(void); static inline void amd_iommu_apply_ivrs_quirks(void) { } #endif -extern void amd_iommu_domain_set_pgtable(struct protection_domain *domain, - u64 *root, int mode); -extern struct dev_table_entry *get_dev_table(struct amd_iommu *iommu); +void amd_iommu_domain_set_pgtable(struct protection_domain *domain, + u64 *root, int mode); +struct dev_table_entry *get_dev_table(struct amd_iommu *iommu); extern u64 amd_iommu_efr; extern u64 amd_iommu_efr2; diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index ab8aa8f77cc4..dc1db6167927 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -84,21 +84,21 @@ /* Extended Feature Bits */ -#define FEATURE_PREFETCH (1ULL<<0) -#define FEATURE_PPR (1ULL<<1) -#define FEATURE_X2APIC (1ULL<<2) -#define FEATURE_NX (1ULL<<3) -#define FEATURE_GT (1ULL<<4) -#define FEATURE_IA (1ULL<<6) -#define FEATURE_GA (1ULL<<7) -#define FEATURE_HE (1ULL<<8) -#define FEATURE_PC (1ULL<<9) +#define FEATURE_PREFETCH BIT_ULL(0) +#define FEATURE_PPR BIT_ULL(1) +#define FEATURE_X2APIC BIT_ULL(2) +#define FEATURE_NX BIT_ULL(3) +#define FEATURE_GT BIT_ULL(4) +#define FEATURE_IA BIT_ULL(6) +#define FEATURE_GA BIT_ULL(7) +#define FEATURE_HE BIT_ULL(8) +#define FEATURE_PC BIT_ULL(9) #define FEATURE_GATS_SHIFT (12) #define FEATURE_GATS_MASK (3ULL) -#define FEATURE_GAM_VAPIC (1ULL<<21) -#define FEATURE_GIOSUP (1ULL<<48) -#define FEATURE_EPHSUP (1ULL<<50) -#define FEATURE_SNP (1ULL<<63) +#define FEATURE_GAM_VAPIC BIT_ULL(21) +#define FEATURE_GIOSUP BIT_ULL(48) +#define FEATURE_EPHSUP BIT_ULL(50) +#define FEATURE_SNP BIT_ULL(63) #define FEATURE_PASID_SHIFT 32 #define FEATURE_PASID_MASK (0x1fULL << FEATURE_PASID_SHIFT) @@ -120,13 +120,13 @@ #define PASID_MASK 0x0000ffff /* MMIO status bits */ -#define MMIO_STATUS_EVT_OVERFLOW_INT_MASK (1 << 0) -#define MMIO_STATUS_EVT_INT_MASK (1 << 1) -#define MMIO_STATUS_COM_WAIT_INT_MASK (1 << 2) -#define MMIO_STATUS_PPR_INT_MASK (1 << 6) -#define MMIO_STATUS_GALOG_RUN_MASK (1 << 8) -#define MMIO_STATUS_GALOG_OVERFLOW_MASK (1 << 9) -#define MMIO_STATUS_GALOG_INT_MASK (1 << 10) +#define MMIO_STATUS_EVT_OVERFLOW_INT_MASK BIT(0) +#define MMIO_STATUS_EVT_INT_MASK BIT(1) +#define MMIO_STATUS_COM_WAIT_INT_MASK BIT(2) +#define MMIO_STATUS_PPR_INT_MASK BIT(6) +#define MMIO_STATUS_GALOG_RUN_MASK BIT(8) +#define MMIO_STATUS_GALOG_OVERFLOW_MASK BIT(9) +#define MMIO_STATUS_GALOG_INT_MASK BIT(10) /* event logging constants */ #define EVENT_ENTRY_SIZE 0x10 @@ -174,6 +174,7 @@ #define CONTROL_GAINT_EN 29 #define CONTROL_XT_EN 50 #define CONTROL_INTCAPXT_EN 51 +#define CONTROL_IRTCACHEDIS 59 #define CONTROL_SNPAVIC_EN 61 #define CTRL_INV_TO_MASK (7 << CONTROL_INV_TIMEOUT) @@ -283,7 +284,7 @@ #define AMD_IOMMU_PGSIZES_V2 (PAGE_SIZE | (1ULL << 21) | (1ULL << 30)) /* Bit value definition for dte irq remapping fields*/ -#define DTE_IRQ_PHYS_ADDR_MASK (((1ULL << 45)-1) << 6) +#define DTE_IRQ_PHYS_ADDR_MASK GENMASK_ULL(51, 6) #define DTE_IRQ_REMAP_INTCTL_MASK (0x3ULL << 60) #define DTE_IRQ_REMAP_INTCTL (2ULL << 60) #define DTE_IRQ_REMAP_ENABLE 1ULL @@ -369,23 +370,23 @@ /* * Bit value definition for I/O PTE fields */ -#define IOMMU_PTE_PR (1ULL << 0) -#define IOMMU_PTE_U (1ULL << 59) -#define IOMMU_PTE_FC (1ULL << 60) -#define IOMMU_PTE_IR (1ULL << 61) -#define IOMMU_PTE_IW (1ULL << 62) +#define IOMMU_PTE_PR BIT_ULL(0) +#define IOMMU_PTE_U BIT_ULL(59) +#define IOMMU_PTE_FC BIT_ULL(60) +#define IOMMU_PTE_IR BIT_ULL(61) +#define IOMMU_PTE_IW BIT_ULL(62) /* * Bit value definition for DTE fields */ -#define DTE_FLAG_V (1ULL << 0) -#define DTE_FLAG_TV (1ULL << 1) -#define DTE_FLAG_IR (1ULL << 61) -#define DTE_FLAG_IW (1ULL << 62) - -#define DTE_FLAG_IOTLB (1ULL << 32) -#define DTE_FLAG_GIOV (1ULL << 54) -#define DTE_FLAG_GV (1ULL << 55) +#define DTE_FLAG_V BIT_ULL(0) +#define DTE_FLAG_TV BIT_ULL(1) +#define DTE_FLAG_IR BIT_ULL(61) +#define DTE_FLAG_IW BIT_ULL(62) + +#define DTE_FLAG_IOTLB BIT_ULL(32) +#define DTE_FLAG_GIOV BIT_ULL(54) +#define DTE_FLAG_GV BIT_ULL(55) #define DTE_FLAG_MASK (0x3ffULL << 32) #define DTE_GLX_SHIFT (56) #define DTE_GLX_MASK (3) @@ -439,13 +440,13 @@ #define MAX_DOMAIN_ID 65536 /* Protection domain flags */ -#define PD_DMA_OPS_MASK (1UL << 0) /* domain used for dma_ops */ -#define PD_DEFAULT_MASK (1UL << 1) /* domain is a default dma_ops +#define PD_DMA_OPS_MASK BIT(0) /* domain used for dma_ops */ +#define PD_DEFAULT_MASK BIT(1) /* domain is a default dma_ops domain for an IOMMU */ -#define PD_PASSTHROUGH_MASK (1UL << 2) /* domain has no page +#define PD_PASSTHROUGH_MASK BIT(2) /* domain has no page translation */ -#define PD_IOMMUV2_MASK (1UL << 3) /* domain has gcr3 table */ -#define PD_GIOV_MASK (1UL << 4) /* domain enable GIOV support */ +#define PD_IOMMUV2_MASK BIT(3) /* domain has gcr3 table */ +#define PD_GIOV_MASK BIT(4) /* domain enable GIOV support */ extern bool amd_iommu_dump; #define DUMP_printk(format, arg...) \ @@ -716,6 +717,9 @@ struct amd_iommu { /* if one, we need to send a completion wait command */ bool need_sync; + /* true if disable irte caching */ + bool irtcachedis_enabled; + /* Handle for IOMMU core code */ struct iommu_device iommu; @@ -748,7 +752,7 @@ struct amd_iommu { u32 flags; volatile u64 *cmd_sem; - u64 cmd_sem_val; + atomic64_t cmd_sem_val; #ifdef CONFIG_AMD_IOMMU_DEBUGFS /* DebugFS Info */ @@ -882,7 +886,7 @@ extern int amd_iommu_max_glx_val; * This function flushes all internal caches of * the IOMMU used by this driver. */ -extern void iommu_flush_all_caches(struct amd_iommu *iommu); +void iommu_flush_all_caches(struct amd_iommu *iommu); static inline int get_ioapic_devid(int id) { @@ -1006,7 +1010,6 @@ struct amd_ir_data { struct irq_2_irte irq_2_irte; struct msi_msg msi_entry; void *entry; /* Pointer to union irte or struct irte_ga */ - void *ref; /* Pointer to the actual irte */ /** * Store information for activate/de-activate diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index c2d80a4e5fb0..ea0f1ab94178 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -162,6 +162,7 @@ static int amd_iommu_xt_mode = IRQ_REMAP_XAPIC_MODE; static bool amd_iommu_detected; static bool amd_iommu_disabled __initdata; static bool amd_iommu_force_enable __initdata; +static bool amd_iommu_irtcachedis; static int amd_iommu_target_ivhd_type; /* Global EFR and EFR2 registers */ @@ -484,6 +485,9 @@ static void iommu_disable(struct amd_iommu *iommu) /* Disable IOMMU hardware itself */ iommu_feature_disable(iommu, CONTROL_IOMMU_EN); + + /* Clear IRTE cache disabling bit */ + iommu_feature_disable(iommu, CONTROL_IRTCACHEDIS); } /* @@ -1753,7 +1757,7 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h, iommu->pci_seg = pci_seg; raw_spin_lock_init(&iommu->lock); - iommu->cmd_sem_val = 0; + atomic64_set(&iommu->cmd_sem_val, 0); /* Add IOMMU to internal data structures */ list_add_tail(&iommu->list, &amd_iommu_list); @@ -2710,6 +2714,33 @@ static void iommu_enable_ga(struct amd_iommu *iommu) #endif } +static void iommu_disable_irtcachedis(struct amd_iommu *iommu) +{ + iommu_feature_disable(iommu, CONTROL_IRTCACHEDIS); +} + +static void iommu_enable_irtcachedis(struct amd_iommu *iommu) +{ + u64 ctrl; + + if (!amd_iommu_irtcachedis) + return; + + /* + * Note: + * The support for IRTCacheDis feature is dertermined by + * checking if the bit is writable. + */ + iommu_feature_enable(iommu, CONTROL_IRTCACHEDIS); + ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET); + ctrl &= (1ULL << CONTROL_IRTCACHEDIS); + if (ctrl) + iommu->irtcachedis_enabled = true; + pr_info("iommu%d (%#06x) : IRT cache is %s\n", + iommu->index, iommu->devid, + iommu->irtcachedis_enabled ? "disabled" : "enabled"); +} + static void early_enable_iommu(struct amd_iommu *iommu) { iommu_disable(iommu); @@ -2720,6 +2751,7 @@ static void early_enable_iommu(struct amd_iommu *iommu) iommu_set_exclusion_range(iommu); iommu_enable_ga(iommu); iommu_enable_xt(iommu); + iommu_enable_irtcachedis(iommu); iommu_enable(iommu); iommu_flush_all_caches(iommu); } @@ -2770,10 +2802,12 @@ static void early_enable_iommus(void) for_each_iommu(iommu) { iommu_disable_command_buffer(iommu); iommu_disable_event_buffer(iommu); + iommu_disable_irtcachedis(iommu); iommu_enable_command_buffer(iommu); iommu_enable_event_buffer(iommu); iommu_enable_ga(iommu); iommu_enable_xt(iommu); + iommu_enable_irtcachedis(iommu); iommu_set_device_table(iommu); iommu_flush_all_caches(iommu); } @@ -3426,6 +3460,8 @@ static int __init parse_amd_iommu_options(char *str) amd_iommu_pgtable = AMD_IOMMU_V1; } else if (strncmp(str, "pgtbl_v2", 8) == 0) { amd_iommu_pgtable = AMD_IOMMU_V2; + } else if (strncmp(str, "irtcachedis", 11) == 0) { + amd_iommu_irtcachedis = true; } else { pr_notice("Unknown option - '%s'\n", str); } diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c index 1b67116882be..2892aa1b4dc1 100644 --- a/drivers/iommu/amd/io_pgtable.c +++ b/drivers/iommu/amd/io_pgtable.c @@ -310,8 +310,8 @@ static u64 *fetch_pte(struct amd_io_pgtable *pgtable, return NULL; /* Large PTE */ - if (PM_PTE_LEVEL(*pte) == 7 || - PM_PTE_LEVEL(*pte) == 0) + if (PM_PTE_LEVEL(*pte) == PAGE_MODE_7_LEVEL || + PM_PTE_LEVEL(*pte) == PAGE_MODE_NONE) break; /* No level skipping support yet */ diff --git a/drivers/iommu/amd/io_pgtable_v2.c b/drivers/iommu/amd/io_pgtable_v2.c index 27c3015947e6..e9ef2e0a62f6 100644 --- a/drivers/iommu/amd/io_pgtable_v2.c +++ b/drivers/iommu/amd/io_pgtable_v2.c @@ -2,7 +2,7 @@ /* * CPU-agnostic AMD IO page table v2 allocator. * - * Copyright (C) 2022 Advanced Micro Devices, Inc. + * Copyright (C) 2022, 2023 Advanced Micro Devices, Inc. * Author: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> * Author: Vasant Hegde <vasant.hegde@amd.com> */ diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 9ea40960978b..c3b58a8389b9 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -1182,11 +1182,11 @@ static int iommu_completion_wait(struct amd_iommu *iommu) if (!iommu->need_sync) return 0; - raw_spin_lock_irqsave(&iommu->lock, flags); - - data = ++iommu->cmd_sem_val; + data = atomic64_add_return(1, &iommu->cmd_sem_val); build_completion_wait(&cmd, iommu, data); + raw_spin_lock_irqsave(&iommu->lock, flags); + ret = __iommu_queue_command_sync(iommu, &cmd, false); if (ret) goto out_unlock; @@ -1273,6 +1273,9 @@ static void amd_iommu_flush_irt_all(struct amd_iommu *iommu) u32 devid; u16 last_bdf = iommu->pci_seg->last_bdf; + if (iommu->irtcachedis_enabled) + return; + for (devid = 0; devid <= last_bdf; devid++) iommu_flush_irt(iommu, devid); @@ -2313,6 +2316,8 @@ static bool amd_iommu_capable(struct device *dev, enum iommu_cap cap) return amdr_ivrs_remap_support; case IOMMU_CAP_ENFORCE_CACHE_COHERENCY: return true; + case IOMMU_CAP_DEFERRED_FLUSH: + return true; default: break; } @@ -2822,6 +2827,32 @@ EXPORT_SYMBOL(amd_iommu_device_info); static struct irq_chip amd_ir_chip; static DEFINE_SPINLOCK(iommu_table_lock); +static void iommu_flush_irt_and_complete(struct amd_iommu *iommu, u16 devid) +{ + int ret; + u64 data; + unsigned long flags; + struct iommu_cmd cmd, cmd2; + + if (iommu->irtcachedis_enabled) + return; + + build_inv_irt(&cmd, devid); + data = atomic64_add_return(1, &iommu->cmd_sem_val); + build_completion_wait(&cmd2, iommu, data); + + raw_spin_lock_irqsave(&iommu->lock, flags); + ret = __iommu_queue_command_sync(iommu, &cmd, true); + if (ret) + goto out; + ret = __iommu_queue_command_sync(iommu, &cmd2, false); + if (ret) + goto out; + wait_on_sem(iommu, data); +out: + raw_spin_unlock_irqrestore(&iommu->lock, flags); +} + static void set_dte_irq_entry(struct amd_iommu *iommu, u16 devid, struct irq_remap_table *table) { @@ -3021,7 +3052,7 @@ out: } static int modify_irte_ga(struct amd_iommu *iommu, u16 devid, int index, - struct irte_ga *irte, struct amd_ir_data *data) + struct irte_ga *irte) { struct irq_remap_table *table; struct irte_ga *entry; @@ -3046,13 +3077,9 @@ static int modify_irte_ga(struct amd_iommu *iommu, u16 devid, int index, old = entry->irte; WARN_ON(!try_cmpxchg128(&entry->irte, &old, irte->irte)); - if (data) - data->ref = entry; - raw_spin_unlock_irqrestore(&table->lock, flags); - iommu_flush_irt(iommu, devid); - iommu_completion_wait(iommu); + iommu_flush_irt_and_complete(iommu, devid); return 0; } @@ -3071,8 +3098,7 @@ static int modify_irte(struct amd_iommu *iommu, table->table[index] = irte->val; raw_spin_unlock_irqrestore(&table->lock, flags); - iommu_flush_irt(iommu, devid); - iommu_completion_wait(iommu); + iommu_flush_irt_and_complete(iommu, devid); return 0; } @@ -3090,8 +3116,7 @@ static void free_irte(struct amd_iommu *iommu, u16 devid, int index) iommu->irte_ops->clear_allocated(table, index); raw_spin_unlock_irqrestore(&table->lock, flags); - iommu_flush_irt(iommu, devid); - iommu_completion_wait(iommu); + iommu_flush_irt_and_complete(iommu, devid); } static void irte_prepare(void *entry, @@ -3137,7 +3162,7 @@ static void irte_ga_activate(struct amd_iommu *iommu, void *entry, u16 devid, u1 struct irte_ga *irte = (struct irte_ga *) entry; irte->lo.fields_remap.valid = 1; - modify_irte_ga(iommu, devid, index, irte, NULL); + modify_irte_ga(iommu, devid, index, irte); } static void irte_deactivate(struct amd_iommu *iommu, void *entry, u16 devid, u16 index) @@ -3153,7 +3178,7 @@ static void irte_ga_deactivate(struct amd_iommu *iommu, void *entry, u16 devid, struct irte_ga *irte = (struct irte_ga *) entry; irte->lo.fields_remap.valid = 0; - modify_irte_ga(iommu, devid, index, irte, NULL); + modify_irte_ga(iommu, devid, index, irte); } static void irte_set_affinity(struct amd_iommu *iommu, void *entry, u16 devid, u16 index, @@ -3177,7 +3202,7 @@ static void irte_ga_set_affinity(struct amd_iommu *iommu, void *entry, u16 devid APICID_TO_IRTE_DEST_LO(dest_apicid); irte->hi.fields.destination = APICID_TO_IRTE_DEST_HI(dest_apicid); - modify_irte_ga(iommu, devid, index, irte, NULL); + modify_irte_ga(iommu, devid, index, irte); } } @@ -3527,7 +3552,7 @@ int amd_iommu_activate_guest_mode(void *data) entry->lo.fields_vapic.ga_tag = ir_data->ga_tag; return modify_irte_ga(ir_data->iommu, ir_data->irq_2_irte.devid, - ir_data->irq_2_irte.index, entry, ir_data); + ir_data->irq_2_irte.index, entry); } EXPORT_SYMBOL(amd_iommu_activate_guest_mode); @@ -3557,7 +3582,7 @@ int amd_iommu_deactivate_guest_mode(void *data) APICID_TO_IRTE_DEST_HI(cfg->dest_apicid); return modify_irte_ga(ir_data->iommu, ir_data->irq_2_irte.devid, - ir_data->irq_2_irte.index, entry, ir_data); + ir_data->irq_2_irte.index, entry); } EXPORT_SYMBOL(amd_iommu_deactivate_guest_mode); @@ -3719,44 +3744,26 @@ int amd_iommu_create_irq_domain(struct amd_iommu *iommu) int amd_iommu_update_ga(int cpu, bool is_run, void *data) { - unsigned long flags; - struct amd_iommu *iommu; - struct irq_remap_table *table; struct amd_ir_data *ir_data = (struct amd_ir_data *)data; - int devid = ir_data->irq_2_irte.devid; struct irte_ga *entry = (struct irte_ga *) ir_data->entry; - struct irte_ga *ref = (struct irte_ga *) ir_data->ref; if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) || - !ref || !entry || !entry->lo.fields_vapic.guest_mode) + !entry || !entry->lo.fields_vapic.guest_mode) return 0; - iommu = ir_data->iommu; - if (!iommu) + if (!ir_data->iommu) return -ENODEV; - table = get_irq_table(iommu, devid); - if (!table) - return -ENODEV; - - raw_spin_lock_irqsave(&table->lock, flags); - - if (ref->lo.fields_vapic.guest_mode) { - if (cpu >= 0) { - ref->lo.fields_vapic.destination = - APICID_TO_IRTE_DEST_LO(cpu); - ref->hi.fields.destination = - APICID_TO_IRTE_DEST_HI(cpu); - } - ref->lo.fields_vapic.is_run = is_run; - barrier(); + if (cpu >= 0) { + entry->lo.fields_vapic.destination = + APICID_TO_IRTE_DEST_LO(cpu); + entry->hi.fields.destination = + APICID_TO_IRTE_DEST_HI(cpu); } + entry->lo.fields_vapic.is_run = is_run; - raw_spin_unlock_irqrestore(&table->lock, flags); - - iommu_flush_irt(iommu, devid); - iommu_completion_wait(iommu); - return 0; + return modify_irte_ga(ir_data->iommu, ir_data->irq_2_irte.devid, + ir_data->irq_2_irte.index, entry); } EXPORT_SYMBOL(amd_iommu_update_ga); #endif diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 3fd83fb75722..9b0dc3505601 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -894,6 +894,12 @@ static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu, { int index; + if (cmds->num == CMDQ_BATCH_ENTRIES - 1 && + (smmu->options & ARM_SMMU_OPT_CMDQ_FORCE_SYNC)) { + arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true); + cmds->num = 0; + } + if (cmds->num == CMDQ_BATCH_ENTRIES) { arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, false); cmds->num = 0; @@ -1892,8 +1898,13 @@ static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd, /* Convert page size of 12,14,16 (log2) to 1,2,3 */ cmd->tlbi.tg = (tg - 10) / 2; - /* Determine what level the granule is at */ - cmd->tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3)); + /* + * Determine what level the granule is at. For non-leaf, io-pgtable + * assumes .tlb_flush_walk can invalidate multiple levels at once, + * so ignore the nominal last-level granule and leave TTL=0. + */ + if (cmd->tlbi.leaf) + cmd->tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3)); num_pages = size >> tg; } @@ -2008,6 +2019,7 @@ static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap) /* Assume that a coherent TCU implies coherent TBUs */ return master->smmu->features & ARM_SMMU_FEAT_COHERENCY; case IOMMU_CAP_NOEXEC: + case IOMMU_CAP_DEFERRED_FLUSH: return true; default: return false; @@ -2023,7 +2035,6 @@ static struct iommu_domain *arm_smmu_domain_alloc(unsigned type) if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_DMA && - type != IOMMU_DOMAIN_DMA_FQ && type != IOMMU_DOMAIN_IDENTITY) return NULL; @@ -3429,6 +3440,44 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass) return 0; } +#define IIDR_IMPLEMENTER_ARM 0x43b +#define IIDR_PRODUCTID_ARM_MMU_600 0x483 +#define IIDR_PRODUCTID_ARM_MMU_700 0x487 + +static void arm_smmu_device_iidr_probe(struct arm_smmu_device *smmu) +{ + u32 reg; + unsigned int implementer, productid, variant, revision; + + reg = readl_relaxed(smmu->base + ARM_SMMU_IIDR); + implementer = FIELD_GET(IIDR_IMPLEMENTER, reg); + productid = FIELD_GET(IIDR_PRODUCTID, reg); + variant = FIELD_GET(IIDR_VARIANT, reg); + revision = FIELD_GET(IIDR_REVISION, reg); + + switch (implementer) { + case IIDR_IMPLEMENTER_ARM: + switch (productid) { + case IIDR_PRODUCTID_ARM_MMU_600: + /* Arm erratum 1076982 */ + if (variant == 0 && revision <= 2) + smmu->features &= ~ARM_SMMU_FEAT_SEV; + /* Arm erratum 1209401 */ + if (variant < 2) + smmu->features &= ~ARM_SMMU_FEAT_NESTING; + break; + case IIDR_PRODUCTID_ARM_MMU_700: + /* Arm erratum 2812531 */ + smmu->features &= ~ARM_SMMU_FEAT_BTM; + smmu->options |= ARM_SMMU_OPT_CMDQ_FORCE_SYNC; + /* Arm errata 2268618, 2812531 */ + smmu->features &= ~ARM_SMMU_FEAT_NESTING; + break; + } + break; + } +} + static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu) { u32 reg; @@ -3635,6 +3684,12 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu) smmu->ias = max(smmu->ias, smmu->oas); + if ((smmu->features & ARM_SMMU_FEAT_TRANS_S1) && + (smmu->features & ARM_SMMU_FEAT_TRANS_S2)) + smmu->features |= ARM_SMMU_FEAT_NESTING; + + arm_smmu_device_iidr_probe(smmu); + if (arm_smmu_sva_supported(smmu)) smmu->features |= ARM_SMMU_FEAT_SVA; diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index b574c58a3487..dcab85698a4e 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -69,6 +69,12 @@ #define IDR5_VAX GENMASK(11, 10) #define IDR5_VAX_52_BIT 1 +#define ARM_SMMU_IIDR 0x18 +#define IIDR_PRODUCTID GENMASK(31, 20) +#define IIDR_VARIANT GENMASK(19, 16) +#define IIDR_REVISION GENMASK(15, 12) +#define IIDR_IMPLEMENTER GENMASK(11, 0) + #define ARM_SMMU_CR0 0x20 #define CR0_ATSCHK (1 << 4) #define CR0_CMDQEN (1 << 3) @@ -639,11 +645,13 @@ struct arm_smmu_device { #define ARM_SMMU_FEAT_BTM (1 << 16) #define ARM_SMMU_FEAT_SVA (1 << 17) #define ARM_SMMU_FEAT_E2H (1 << 18) +#define ARM_SMMU_FEAT_NESTING (1 << 19) u32 features; #define ARM_SMMU_OPT_SKIP_PREFETCH (1 << 0) #define ARM_SMMU_OPT_PAGE0_REGS_ONLY (1 << 1) #define ARM_SMMU_OPT_MSIPOLL (1 << 2) +#define ARM_SMMU_OPT_CMDQ_FORCE_SYNC (1 << 3) u32 options; struct arm_smmu_cmdq cmdq; diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c index 6e0813b26fb6..a86acd76c1df 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c @@ -856,8 +856,7 @@ static struct iommu_domain *arm_smmu_domain_alloc(unsigned type) struct arm_smmu_domain *smmu_domain; if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_IDENTITY) { - if (using_legacy_binding || - (type != IOMMU_DOMAIN_DMA && type != IOMMU_DOMAIN_DMA_FQ)) + if (using_legacy_binding || type != IOMMU_DOMAIN_DMA) return NULL; } /* @@ -1325,6 +1324,7 @@ static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap) return cfg->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK || device_get_dma_attr(dev) == DEV_DMA_COHERENT; case IOMMU_CAP_NOEXEC: + case IOMMU_CAP_DEFERRED_FLUSH: return true; default: return false; diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index e86ae462cade..e57724163835 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -615,7 +615,8 @@ static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, goto done_unlock; /* If the FQ fails we can simply fall back to strict mode */ - if (domain->type == IOMMU_DOMAIN_DMA_FQ && iommu_dma_init_fq(domain)) + if (domain->type == IOMMU_DOMAIN_DMA_FQ && + (!device_iommu_capable(dev, IOMMU_CAP_DEFERRED_FLUSH) || iommu_dma_init_fq(domain))) domain->type = IOMMU_DOMAIN_DMA; ret = iova_reserve_iommu_regions(dev, domain); diff --git a/drivers/iommu/fsl_pamu_domain.c b/drivers/iommu/fsl_pamu_domain.c index bce372297099..4ac0e247ec2b 100644 --- a/drivers/iommu/fsl_pamu_domain.c +++ b/drivers/iommu/fsl_pamu_domain.c @@ -334,17 +334,6 @@ int fsl_pamu_configure_l1_stash(struct iommu_domain *domain, u32 cpu) return ret; } -static struct iommu_group *get_device_iommu_group(struct device *dev) -{ - struct iommu_group *group; - - group = iommu_group_get(dev); - if (!group) - group = iommu_group_alloc(); - - return group; -} - static bool check_pci_ctl_endpt_part(struct pci_controller *pci_ctl) { u32 version; @@ -356,94 +345,52 @@ static bool check_pci_ctl_endpt_part(struct pci_controller *pci_ctl) return version >= 0x204; } -/* Get iommu group information from peer devices or devices on the parent bus */ -static struct iommu_group *get_shared_pci_device_group(struct pci_dev *pdev) +static struct iommu_group *fsl_pamu_device_group(struct device *dev) { - struct pci_dev *tmp; struct iommu_group *group; - struct pci_bus *bus = pdev->bus; + struct pci_dev *pdev; /* - * Traverese the pci bus device list to get - * the shared iommu group. + * For platform devices we allocate a separate group for each of the + * devices. */ - while (bus) { - list_for_each_entry(tmp, &bus->devices, bus_list) { - if (tmp == pdev) - continue; - group = iommu_group_get(&tmp->dev); - if (group) - return group; - } + if (!dev_is_pci(dev)) + return generic_device_group(dev); - bus = bus->parent; - } - - return NULL; -} - -static struct iommu_group *get_pci_device_group(struct pci_dev *pdev) -{ - struct pci_controller *pci_ctl; - bool pci_endpt_partitioning; - struct iommu_group *group = NULL; - - pci_ctl = pci_bus_to_host(pdev->bus); - pci_endpt_partitioning = check_pci_ctl_endpt_part(pci_ctl); - /* We can partition PCIe devices so assign device group to the device */ - if (pci_endpt_partitioning) { - group = pci_device_group(&pdev->dev); - - /* - * PCIe controller is not a paritionable entity - * free the controller device iommu_group. - */ - if (pci_ctl->parent->iommu_group) - iommu_group_remove_device(pci_ctl->parent); - } else { - /* - * All devices connected to the controller will share the - * PCI controllers device group. If this is the first - * device to be probed for the pci controller, copy the - * device group information from the PCI controller device - * node and remove the PCI controller iommu group. - * For subsequent devices, the iommu group information can - * be obtained from sibling devices (i.e. from the bus_devices - * link list). - */ - if (pci_ctl->parent->iommu_group) { - group = get_device_iommu_group(pci_ctl->parent); - iommu_group_remove_device(pci_ctl->parent); - } else { - group = get_shared_pci_device_group(pdev); - } - } - - if (!group) - group = ERR_PTR(-ENODEV); + /* + * We can partition PCIe devices so assign device group to the device + */ + pdev = to_pci_dev(dev); + if (check_pci_ctl_endpt_part(pci_bus_to_host(pdev->bus))) + return pci_device_group(&pdev->dev); + /* + * All devices connected to the controller will share the same device + * group. + * + * Due to ordering between fsl_pamu_init() and fsl_pci_init() it is + * guaranteed that the pci_ctl->parent platform_device will have the + * iommu driver bound and will already have a group set. So we just + * re-use this group as the group for every device in the hose. + */ + group = iommu_group_get(pci_bus_to_host(pdev->bus)->parent); + if (WARN_ON(!group)) + return ERR_PTR(-EINVAL); return group; } -static struct iommu_group *fsl_pamu_device_group(struct device *dev) +static struct iommu_device *fsl_pamu_probe_device(struct device *dev) { - struct iommu_group *group = ERR_PTR(-ENODEV); int len; /* - * For platform devices we allocate a separate group for - * each of the devices. + * uboot must fill the fsl,liodn for platform devices to be supported by + * the iommu. */ - if (dev_is_pci(dev)) - group = get_pci_device_group(to_pci_dev(dev)); - else if (of_get_property(dev->of_node, "fsl,liodn", &len)) - group = get_device_iommu_group(dev); - - return group; -} + if (!dev_is_pci(dev) && + !of_get_property(dev->of_node, "fsl,liodn", &len)) + return ERR_PTR(-ENODEV); -static struct iommu_device *fsl_pamu_probe_device(struct device *dev) -{ return &pamu_iommu; } diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index b871a6afd803..5c8c5cdc36cf 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -1185,7 +1185,7 @@ static int iommu_alloc_root_entry(struct intel_iommu *iommu) { struct root_entry *root; - root = (struct root_entry *)alloc_pgtable_page(iommu->node, GFP_ATOMIC); + root = alloc_pgtable_page(iommu->node, GFP_ATOMIC); if (!root) { pr_err("Allocating root entry for %s failed\n", iommu->name); @@ -1312,15 +1312,7 @@ static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did, iommu->name, type); return; } - /* Note: set drain read/write */ -#if 0 - /* - * This is probably to be super secure.. Looks like we can - * ignore it without any impact. - */ - if (cap_read_drain(iommu->cap)) - val |= DMA_TLB_READ_DRAIN; -#endif + if (cap_write_drain(iommu->cap)) val |= DMA_TLB_WRITE_DRAIN; @@ -1897,8 +1889,6 @@ static int domain_context_mapping_one(struct dmar_domain *domain, struct context_entry *context; int ret; - WARN_ON(did == 0); - if (hw_pass_through && domain_type_is_si(domain)) translation = CONTEXT_TT_PASS_THROUGH; @@ -1944,8 +1934,6 @@ static int domain_context_mapping_one(struct dmar_domain *domain, if (sm_supported(iommu)) { unsigned long pds; - WARN_ON(!table); - /* Setup the PASID DIR pointer: */ pds = context_get_sm_pds(table); context->lo = (u64)virt_to_phys(table->table) | @@ -2967,10 +2955,15 @@ static int init_iommu_hw(void) { struct dmar_drhd_unit *drhd; struct intel_iommu *iommu = NULL; + int ret; - for_each_active_iommu(iommu, drhd) - if (iommu->qi) - dmar_reenable_qi(iommu); + for_each_active_iommu(iommu, drhd) { + if (iommu->qi) { + ret = dmar_reenable_qi(iommu); + if (ret) + return ret; + } + } for_each_iommu(iommu, drhd) { if (drhd->ignored) { @@ -4064,7 +4057,6 @@ static struct iommu_domain *intel_iommu_domain_alloc(unsigned type) case IOMMU_DOMAIN_BLOCKED: return &blocking_domain; case IOMMU_DOMAIN_DMA: - case IOMMU_DOMAIN_DMA_FQ: case IOMMU_DOMAIN_UNMANAGED: dmar_domain = alloc_domain(type); if (!dmar_domain) { @@ -4369,6 +4361,7 @@ static bool intel_iommu_capable(struct device *dev, enum iommu_cap cap) switch (cap) { case IOMMU_CAP_CACHE_COHERENCY: + case IOMMU_CAP_DEFERRED_FLUSH: return true; case IOMMU_CAP_PRE_BOOT_PROTECTION: return dmar_platform_optin(); diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index eb620552967b..da340f11c5f5 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -68,6 +68,10 @@ struct group_device { char *name; }; +/* Iterate over each struct group_device in a struct iommu_group */ +#define for_each_group_device(group, pos) \ + list_for_each_entry(pos, &(group)->devices, list) + struct iommu_group_attribute { struct attribute attr; ssize_t (*show)(struct iommu_group *group, char *buf); @@ -89,17 +93,39 @@ static const char * const iommu_group_resv_type_string[] = { static int iommu_bus_notifier(struct notifier_block *nb, unsigned long action, void *data); static void iommu_release_device(struct device *dev); -static int iommu_alloc_default_domain(struct iommu_group *group, - struct device *dev); static struct iommu_domain *__iommu_domain_alloc(const struct bus_type *bus, unsigned type); static int __iommu_attach_device(struct iommu_domain *domain, struct device *dev); static int __iommu_attach_group(struct iommu_domain *domain, struct iommu_group *group); + +enum { + IOMMU_SET_DOMAIN_MUST_SUCCEED = 1 << 0, +}; + +static int __iommu_device_set_domain(struct iommu_group *group, + struct device *dev, + struct iommu_domain *new_domain, + unsigned int flags); +static int __iommu_group_set_domain_internal(struct iommu_group *group, + struct iommu_domain *new_domain, + unsigned int flags); static int __iommu_group_set_domain(struct iommu_group *group, - struct iommu_domain *new_domain); -static int iommu_create_device_direct_mappings(struct iommu_group *group, + struct iommu_domain *new_domain) +{ + return __iommu_group_set_domain_internal(group, new_domain, 0); +} +static void __iommu_group_set_domain_nofail(struct iommu_group *group, + struct iommu_domain *new_domain) +{ + WARN_ON(__iommu_group_set_domain_internal( + group, new_domain, IOMMU_SET_DOMAIN_MUST_SUCCEED)); +} + +static int iommu_setup_default_domain(struct iommu_group *group, + int target_type); +static int iommu_create_device_direct_mappings(struct iommu_domain *domain, struct device *dev); static struct iommu_group *iommu_group_get_for_dev(struct device *dev); static ssize_t iommu_group_store_type(struct iommu_group *group, @@ -176,16 +202,16 @@ static int __init iommu_subsys_init(void) if (!iommu_default_passthrough() && !iommu_dma_strict) iommu_def_domain_type = IOMMU_DOMAIN_DMA_FQ; - pr_info("Default domain type: %s %s\n", + pr_info("Default domain type: %s%s\n", iommu_domain_type_str(iommu_def_domain_type), (iommu_cmd_line & IOMMU_CMD_LINE_DMA_API) ? - "(set via kernel command line)" : ""); + " (set via kernel command line)" : ""); if (!iommu_default_passthrough()) - pr_info("DMA domain TLB invalidation policy: %s mode %s\n", + pr_info("DMA domain TLB invalidation policy: %s mode%s\n", iommu_dma_strict ? "strict" : "lazy", (iommu_cmd_line & IOMMU_CMD_LINE_STRICT) ? - "(set via kernel command line)" : ""); + " (set via kernel command line)" : ""); nb = kcalloc(ARRAY_SIZE(iommu_buses), sizeof(*nb), GFP_KERNEL); if (!nb) @@ -343,6 +369,8 @@ static int __iommu_probe_device(struct device *dev, struct list_head *group_list dev->iommu->iommu_dev = iommu_dev; dev->iommu->max_pasids = dev_iommu_get_max_pasids(dev); + if (ops->is_attach_deferred) + dev->iommu->attach_deferred = ops->is_attach_deferred(dev); group = iommu_group_get_for_dev(dev); if (IS_ERR(group)) { @@ -377,30 +405,6 @@ err_unlock: return ret; } -static bool iommu_is_attach_deferred(struct device *dev) -{ - const struct iommu_ops *ops = dev_iommu_ops(dev); - - if (ops->is_attach_deferred) - return ops->is_attach_deferred(dev); - - return false; -} - -static int iommu_group_do_dma_first_attach(struct device *dev, void *data) -{ - struct iommu_domain *domain = data; - - lockdep_assert_held(&dev->iommu_group->mutex); - - if (iommu_is_attach_deferred(dev)) { - dev->iommu->attach_deferred = 1; - return 0; - } - - return __iommu_attach_device(domain, dev); -} - int iommu_probe_device(struct device *dev) { const struct iommu_ops *ops; @@ -417,29 +421,20 @@ int iommu_probe_device(struct device *dev) goto err_release; } - /* - * Try to allocate a default domain - needs support from the - * IOMMU driver. There are still some drivers which don't - * support default domains, so the return value is not yet - * checked. - */ mutex_lock(&group->mutex); - iommu_alloc_default_domain(group, dev); - /* - * If device joined an existing group which has been claimed, don't - * attach the default domain. - */ - if (group->default_domain && !group->owner) { - ret = iommu_group_do_dma_first_attach(dev, group->default_domain); - if (ret) { - mutex_unlock(&group->mutex); - iommu_group_put(group); - goto err_release; - } - } + if (group->default_domain) + iommu_create_device_direct_mappings(group->default_domain, dev); - iommu_create_device_direct_mappings(group, dev); + if (group->domain) { + ret = __iommu_device_set_domain(group, dev, group->domain, 0); + if (ret) + goto err_unlock; + } else if (!group->default_domain) { + ret = iommu_setup_default_domain(group, 0); + if (ret) + goto err_unlock; + } mutex_unlock(&group->mutex); iommu_group_put(group); @@ -450,6 +445,9 @@ int iommu_probe_device(struct device *dev) return 0; +err_unlock: + mutex_unlock(&group->mutex); + iommu_group_put(group); err_release: iommu_release_device(dev); @@ -468,7 +466,7 @@ __iommu_group_remove_device(struct iommu_group *group, struct device *dev) struct group_device *device; lockdep_assert_held(&group->mutex); - list_for_each_entry(device, &group->devices, list) { + for_each_group_device(group, device) { if (device->dev == dev) { list_del(&device->list); return device; @@ -707,7 +705,7 @@ int iommu_get_group_resv_regions(struct iommu_group *group, int ret = 0; mutex_lock(&group->mutex); - list_for_each_entry(device, &group->devices, list) { + for_each_group_device(group, device) { struct list_head dev_resv_regions; /* @@ -953,16 +951,15 @@ int iommu_group_set_name(struct iommu_group *group, const char *name) } EXPORT_SYMBOL_GPL(iommu_group_set_name); -static int iommu_create_device_direct_mappings(struct iommu_group *group, +static int iommu_create_device_direct_mappings(struct iommu_domain *domain, struct device *dev) { - struct iommu_domain *domain = group->default_domain; struct iommu_resv_region *entry; struct list_head mappings; unsigned long pg_size; int ret = 0; - if (!domain || !iommu_is_dma_domain(domain)) + if (!iommu_is_dma_domain(domain)) return 0; BUG_ON(!domain->pgsize_bitmap); @@ -1069,25 +1066,13 @@ rename: mutex_lock(&group->mutex); list_add_tail(&device->list, &group->devices); - if (group->domain) - ret = iommu_group_do_dma_first_attach(dev, group->domain); mutex_unlock(&group->mutex); - if (ret) - goto err_put_group; - trace_add_device_to_group(group->id, dev); dev_info(dev, "Adding to iommu group %d\n", group->id); return 0; -err_put_group: - mutex_lock(&group->mutex); - list_del(&device->list); - mutex_unlock(&group->mutex); - dev->iommu_group = NULL; - kobject_put(group->devices_kobj); - sysfs_remove_link(group->devices_kobj, device->name); err_free_name: kfree(device->name); err_remove_link: @@ -1125,31 +1110,6 @@ void iommu_group_remove_device(struct device *dev) } EXPORT_SYMBOL_GPL(iommu_group_remove_device); -static int iommu_group_device_count(struct iommu_group *group) -{ - struct group_device *entry; - int ret = 0; - - list_for_each_entry(entry, &group->devices, list) - ret++; - - return ret; -} - -static int __iommu_group_for_each_dev(struct iommu_group *group, void *data, - int (*fn)(struct device *, void *)) -{ - struct group_device *device; - int ret = 0; - - list_for_each_entry(device, &group->devices, list) { - ret = fn(device->dev, data); - if (ret) - break; - } - return ret; -} - /** * iommu_group_for_each_dev - iterate over each device in the group * @group: the group @@ -1164,10 +1124,15 @@ static int __iommu_group_for_each_dev(struct iommu_group *group, void *data, int iommu_group_for_each_dev(struct iommu_group *group, void *data, int (*fn)(struct device *, void *)) { - int ret; + struct group_device *device; + int ret = 0; mutex_lock(&group->mutex); - ret = __iommu_group_for_each_dev(group, data, fn); + for_each_group_device(group, device) { + ret = fn(device->dev, data); + if (ret) + break; + } mutex_unlock(&group->mutex); return ret; @@ -1656,40 +1621,47 @@ static int iommu_get_def_domain_type(struct device *dev) return 0; } -static int iommu_group_alloc_default_domain(const struct bus_type *bus, - struct iommu_group *group, - unsigned int type) +static struct iommu_domain * +__iommu_group_alloc_default_domain(const struct bus_type *bus, + struct iommu_group *group, int req_type) { - struct iommu_domain *dom; - - dom = __iommu_domain_alloc(bus, type); - if (!dom && type != IOMMU_DOMAIN_DMA) { - dom = __iommu_domain_alloc(bus, IOMMU_DOMAIN_DMA); - if (dom) - pr_warn("Failed to allocate default IOMMU domain of type %u for group %s - Falling back to IOMMU_DOMAIN_DMA", - type, group->name); - } - - if (!dom) - return -ENOMEM; - - group->default_domain = dom; - if (!group->domain) - group->domain = dom; - return 0; + if (group->default_domain && group->default_domain->type == req_type) + return group->default_domain; + return __iommu_domain_alloc(bus, req_type); } -static int iommu_alloc_default_domain(struct iommu_group *group, - struct device *dev) +/* + * req_type of 0 means "auto" which means to select a domain based on + * iommu_def_domain_type or what the driver actually supports. + */ +static struct iommu_domain * +iommu_group_alloc_default_domain(struct iommu_group *group, int req_type) { - unsigned int type; + const struct bus_type *bus = + list_first_entry(&group->devices, struct group_device, list) + ->dev->bus; + struct iommu_domain *dom; - if (group->default_domain) - return 0; + lockdep_assert_held(&group->mutex); + + if (req_type) + return __iommu_group_alloc_default_domain(bus, group, req_type); - type = iommu_get_def_domain_type(dev) ? : iommu_def_domain_type; + /* The driver gave no guidance on what type to use, try the default */ + dom = __iommu_group_alloc_default_domain(bus, group, iommu_def_domain_type); + if (dom) + return dom; - return iommu_group_alloc_default_domain(dev->bus, group, type); + /* Otherwise IDENTITY and DMA_FQ defaults will try DMA */ + if (iommu_def_domain_type == IOMMU_DOMAIN_DMA) + return NULL; + dom = __iommu_group_alloc_default_domain(bus, group, IOMMU_DOMAIN_DMA); + if (!dom) + return NULL; + + pr_warn("Failed to allocate default IOMMU domain of type %u for group %s - Falling back to IOMMU_DOMAIN_DMA", + iommu_def_domain_type, group->name); + return dom; } /** @@ -1774,87 +1746,48 @@ static int iommu_bus_notifier(struct notifier_block *nb, return 0; } -struct __group_domain_type { - struct device *dev; - unsigned int type; -}; - -static int probe_get_default_domain_type(struct device *dev, void *data) -{ - struct __group_domain_type *gtype = data; - unsigned int type = iommu_get_def_domain_type(dev); - - if (type) { - if (gtype->type && gtype->type != type) { - dev_warn(dev, "Device needs domain type %s, but device %s in the same iommu group requires type %s - using default\n", - iommu_domain_type_str(type), - dev_name(gtype->dev), - iommu_domain_type_str(gtype->type)); - gtype->type = 0; - } - - if (!gtype->dev) { - gtype->dev = dev; - gtype->type = type; - } - } - - return 0; -} - -static void probe_alloc_default_domain(const struct bus_type *bus, - struct iommu_group *group) +/* A target_type of 0 will select the best domain type and cannot fail */ +static int iommu_get_default_domain_type(struct iommu_group *group, + int target_type) { - struct __group_domain_type gtype; + int best_type = target_type; + struct group_device *gdev; + struct device *last_dev; - memset(>ype, 0, sizeof(gtype)); - - /* Ask for default domain requirements of all devices in the group */ - __iommu_group_for_each_dev(group, >ype, - probe_get_default_domain_type); - - if (!gtype.type) - gtype.type = iommu_def_domain_type; + lockdep_assert_held(&group->mutex); - iommu_group_alloc_default_domain(bus, group, gtype.type); + for_each_group_device(group, gdev) { + unsigned int type = iommu_get_def_domain_type(gdev->dev); -} + if (best_type && type && best_type != type) { + if (target_type) { + dev_err_ratelimited( + gdev->dev, + "Device cannot be in %s domain\n", + iommu_domain_type_str(target_type)); + return -1; + } -static int __iommu_group_dma_first_attach(struct iommu_group *group) -{ - return __iommu_group_for_each_dev(group, group->default_domain, - iommu_group_do_dma_first_attach); + dev_warn( + gdev->dev, + "Device needs domain type %s, but device %s in the same iommu group requires type %s - using default\n", + iommu_domain_type_str(type), dev_name(last_dev), + iommu_domain_type_str(best_type)); + return 0; + } + if (!best_type) + best_type = type; + last_dev = gdev->dev; + } + return best_type; } -static int iommu_group_do_probe_finalize(struct device *dev, void *data) +static void iommu_group_do_probe_finalize(struct device *dev) { const struct iommu_ops *ops = dev_iommu_ops(dev); if (ops->probe_finalize) ops->probe_finalize(dev); - - return 0; -} - -static void __iommu_group_dma_finalize(struct iommu_group *group) -{ - __iommu_group_for_each_dev(group, group->default_domain, - iommu_group_do_probe_finalize); -} - -static int iommu_do_create_direct_mappings(struct device *dev, void *data) -{ - struct iommu_group *group = data; - - iommu_create_device_direct_mappings(group, dev); - - return 0; -} - -static int iommu_group_create_direct_mappings(struct iommu_group *group) -{ - return __iommu_group_for_each_dev(group, group, - iommu_do_create_direct_mappings); } int bus_iommu_probe(const struct bus_type *bus) @@ -1873,32 +1806,31 @@ int bus_iommu_probe(const struct bus_type *bus) return ret; list_for_each_entry_safe(group, next, &group_list, entry) { + struct group_device *gdev; + mutex_lock(&group->mutex); /* Remove item from the list */ list_del_init(&group->entry); - /* Try to allocate default domain */ - probe_alloc_default_domain(bus, group); - - if (!group->default_domain) { + ret = iommu_setup_default_domain(group, 0); + if (ret) { mutex_unlock(&group->mutex); - continue; + return ret; } - - iommu_group_create_direct_mappings(group); - - ret = __iommu_group_dma_first_attach(group); - mutex_unlock(&group->mutex); - if (ret) - break; - - __iommu_group_dma_finalize(group); + /* + * FIXME: Mis-locked because the ops->probe_finalize() call-back + * of some IOMMU drivers calls arm_iommu_attach_device() which + * in-turn might call back into IOMMU core code, where it tries + * to take group->mutex, resulting in a deadlock. + */ + for_each_group_device(group, gdev) + iommu_group_do_probe_finalize(gdev->dev); } - return ret; + return 0; } bool iommu_present(const struct bus_type *bus) @@ -1946,7 +1878,7 @@ bool iommu_group_has_isolated_msi(struct iommu_group *group) bool ret = true; mutex_lock(&group->mutex); - list_for_each_entry(group_dev, &group->devices, list) + for_each_group_device(group, group_dev) ret &= msi_device_has_isolated_msi(group_dev->dev); mutex_unlock(&group->mutex); return ret; @@ -1980,11 +1912,12 @@ static struct iommu_domain *__iommu_domain_alloc(const struct bus_type *bus, unsigned type) { struct iommu_domain *domain; + unsigned int alloc_type = type & IOMMU_DOMAIN_ALLOC_FLAGS; if (bus == NULL || bus->iommu_ops == NULL) return NULL; - domain = bus->iommu_ops->domain_alloc(type); + domain = bus->iommu_ops->domain_alloc(alloc_type); if (!domain) return NULL; @@ -2028,15 +1961,13 @@ EXPORT_SYMBOL_GPL(iommu_domain_free); static void __iommu_group_set_core_domain(struct iommu_group *group) { struct iommu_domain *new_domain; - int ret; if (group->owner) new_domain = group->blocking_domain; else new_domain = group->default_domain; - ret = __iommu_group_set_domain(group, new_domain); - WARN(ret, "iommu driver failed to attach the default/blocking domain"); + __iommu_group_set_domain_nofail(group, new_domain); } static int __iommu_attach_device(struct iommu_domain *domain, @@ -2082,7 +2013,7 @@ int iommu_attach_device(struct iommu_domain *domain, struct device *dev) */ mutex_lock(&group->mutex); ret = -EINVAL; - if (iommu_group_device_count(group) != 1) + if (list_count_nodes(&group->devices) != 1) goto out_unlock; ret = __iommu_attach_group(domain, group); @@ -2113,7 +2044,7 @@ void iommu_detach_device(struct iommu_domain *domain, struct device *dev) mutex_lock(&group->mutex); if (WARN_ON(domain != group->domain) || - WARN_ON(iommu_group_device_count(group) != 1)) + WARN_ON(list_count_nodes(&group->devices) != 1)) goto out_unlock; __iommu_group_set_core_domain(group); @@ -2149,52 +2080,14 @@ struct iommu_domain *iommu_get_dma_domain(struct device *dev) return dev->iommu_group->default_domain; } -/* - * IOMMU groups are really the natural working unit of the IOMMU, but - * the IOMMU API works on domains and devices. Bridge that gap by - * iterating over the devices in a group. Ideally we'd have a single - * device which represents the requestor ID of the group, but we also - * allow IOMMU drivers to create policy defined minimum sets, where - * the physical hardware may be able to distiguish members, but we - * wish to group them at a higher level (ex. untrusted multi-function - * PCI devices). Thus we attach each device. - */ -static int iommu_group_do_attach_device(struct device *dev, void *data) -{ - struct iommu_domain *domain = data; - - return __iommu_attach_device(domain, dev); -} - static int __iommu_attach_group(struct iommu_domain *domain, struct iommu_group *group) { - int ret; - if (group->domain && group->domain != group->default_domain && group->domain != group->blocking_domain) return -EBUSY; - ret = __iommu_group_for_each_dev(group, domain, - iommu_group_do_attach_device); - if (ret == 0) { - group->domain = domain; - } else { - /* - * To recover from the case when certain device within the - * group fails to attach to the new domain, we need force - * attaching all devices back to the old domain. The old - * domain is compatible for all devices in the group, - * hence the iommu driver should always return success. - */ - struct iommu_domain *old_domain = group->domain; - - group->domain = NULL; - WARN(__iommu_group_set_domain(group, old_domain), - "iommu driver failed to attach a compatible domain"); - } - - return ret; + return __iommu_group_set_domain(group, domain); } /** @@ -2221,21 +2114,61 @@ int iommu_attach_group(struct iommu_domain *domain, struct iommu_group *group) } EXPORT_SYMBOL_GPL(iommu_attach_group); -static int iommu_group_do_set_platform_dma(struct device *dev, void *data) +static int __iommu_device_set_domain(struct iommu_group *group, + struct device *dev, + struct iommu_domain *new_domain, + unsigned int flags) { - const struct iommu_ops *ops = dev_iommu_ops(dev); + int ret; - if (!WARN_ON(!ops->set_platform_dma_ops)) - ops->set_platform_dma_ops(dev); + if (dev->iommu->attach_deferred) { + if (new_domain == group->default_domain) + return 0; + dev->iommu->attach_deferred = 0; + } + ret = __iommu_attach_device(new_domain, dev); + if (ret) { + /* + * If we have a blocking domain then try to attach that in hopes + * of avoiding a UAF. Modern drivers should implement blocking + * domains as global statics that cannot fail. + */ + if ((flags & IOMMU_SET_DOMAIN_MUST_SUCCEED) && + group->blocking_domain && + group->blocking_domain != new_domain) + __iommu_attach_device(group->blocking_domain, dev); + return ret; + } return 0; } -static int __iommu_group_set_domain(struct iommu_group *group, - struct iommu_domain *new_domain) +/* + * If 0 is returned the group's domain is new_domain. If an error is returned + * then the group's domain will be set back to the existing domain unless + * IOMMU_SET_DOMAIN_MUST_SUCCEED, otherwise an error is returned and the group's + * domains is left inconsistent. This is a driver bug to fail attach with a + * previously good domain. We try to avoid a kernel UAF because of this. + * + * IOMMU groups are really the natural working unit of the IOMMU, but the IOMMU + * API works on domains and devices. Bridge that gap by iterating over the + * devices in a group. Ideally we'd have a single device which represents the + * requestor ID of the group, but we also allow IOMMU drivers to create policy + * defined minimum sets, where the physical hardware may be able to distiguish + * members, but we wish to group them at a higher level (ex. untrusted + * multi-function PCI devices). Thus we attach each device. + */ +static int __iommu_group_set_domain_internal(struct iommu_group *group, + struct iommu_domain *new_domain, + unsigned int flags) { + struct group_device *last_gdev; + struct group_device *gdev; + int result; int ret; + lockdep_assert_held(&group->mutex); + if (group->domain == new_domain) return 0; @@ -2245,8 +2178,12 @@ static int __iommu_group_set_domain(struct iommu_group *group, * platform specific behavior. */ if (!new_domain) { - __iommu_group_for_each_dev(group, NULL, - iommu_group_do_set_platform_dma); + for_each_group_device(group, gdev) { + const struct iommu_ops *ops = dev_iommu_ops(gdev->dev); + + if (!WARN_ON(!ops->set_platform_dma_ops)) + ops->set_platform_dma_ops(gdev->dev); + } group->domain = NULL; return 0; } @@ -2256,16 +2193,52 @@ static int __iommu_group_set_domain(struct iommu_group *group, * domain. This switch does not have to be atomic and DMA can be * discarded during the transition. DMA must only be able to access * either new_domain or group->domain, never something else. - * - * Note that this is called in error unwind paths, attaching to a - * domain that has already been attached cannot fail. */ - ret = __iommu_group_for_each_dev(group, new_domain, - iommu_group_do_attach_device); - if (ret) - return ret; + result = 0; + for_each_group_device(group, gdev) { + ret = __iommu_device_set_domain(group, gdev->dev, new_domain, + flags); + if (ret) { + result = ret; + /* + * Keep trying the other devices in the group. If a + * driver fails attach to an otherwise good domain, and + * does not support blocking domains, it should at least + * drop its reference on the current domain so we don't + * UAF. + */ + if (flags & IOMMU_SET_DOMAIN_MUST_SUCCEED) + continue; + goto err_revert; + } + } group->domain = new_domain; - return 0; + return result; + +err_revert: + /* + * This is called in error unwind paths. A well behaved driver should + * always allow us to attach to a domain that was already attached. + */ + last_gdev = gdev; + for_each_group_device(group, gdev) { + const struct iommu_ops *ops = dev_iommu_ops(gdev->dev); + + /* + * If set_platform_dma_ops is not present a NULL domain can + * happen only for first probe, in which case we leave + * group->domain as NULL and let release clean everything up. + */ + if (group->domain) + WARN_ON(__iommu_device_set_domain( + group, gdev->dev, group->domain, + IOMMU_SET_DOMAIN_MUST_SUCCEED)); + else if (ops->set_platform_dma_ops) + ops->set_platform_dma_ops(gdev->dev); + if (gdev == last_gdev) + break; + } + return ret; } void iommu_detach_group(struct iommu_domain *domain, struct iommu_group *group) @@ -2846,78 +2819,112 @@ int iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features feat) } EXPORT_SYMBOL_GPL(iommu_dev_disable_feature); -/* - * Changes the default domain of an iommu group - * - * @group: The group for which the default domain should be changed - * @dev: The first device in the group - * @type: The type of the new default domain that gets associated with the group - * - * Returns 0 on success and error code on failure +/** + * iommu_setup_default_domain - Set the default_domain for the group + * @group: Group to change + * @target_type: Domain type to set as the default_domain * - * Note: - * 1. Presently, this function is called only when user requests to change the - * group's default domain type through /sys/kernel/iommu_groups/<grp_id>/type - * Please take a closer look if intended to use for other purposes. + * Allocate a default domain and set it as the current domain on the group. If + * the group already has a default domain it will be changed to the target_type. + * When target_type is 0 the default domain is selected based on driver and + * system preferences. */ -static int iommu_change_dev_def_domain(struct iommu_group *group, - struct device *dev, int type) +static int iommu_setup_default_domain(struct iommu_group *group, + int target_type) { - struct __group_domain_type gtype = {NULL, 0}; - struct iommu_domain *prev_dom; + struct iommu_domain *old_dom = group->default_domain; + struct group_device *gdev; + struct iommu_domain *dom; + bool direct_failed; + int req_type; int ret; lockdep_assert_held(&group->mutex); - prev_dom = group->default_domain; - __iommu_group_for_each_dev(group, >ype, - probe_get_default_domain_type); - if (!type) { - /* - * If the user hasn't requested any specific type of domain and - * if the device supports both the domains, then default to the - * domain the device was booted with - */ - type = gtype.type ? : iommu_def_domain_type; - } else if (gtype.type && type != gtype.type) { - dev_err_ratelimited(dev, "Device cannot be in %s domain\n", - iommu_domain_type_str(type)); + req_type = iommu_get_default_domain_type(group, target_type); + if (req_type < 0) return -EINVAL; - } /* - * Switch to a new domain only if the requested domain type is different - * from the existing default domain type + * There are still some drivers which don't support default domains, so + * we ignore the failure and leave group->default_domain NULL. + * + * We assume that the iommu driver starts up the device in + * 'set_platform_dma_ops' mode if it does not support default domains. */ - if (prev_dom->type == type) + dom = iommu_group_alloc_default_domain(group, req_type); + if (!dom) { + /* Once in default_domain mode we never leave */ + if (group->default_domain) + return -ENODEV; + group->default_domain = NULL; return 0; + } - group->default_domain = NULL; - group->domain = NULL; - - /* Sets group->default_domain to the newly allocated domain */ - ret = iommu_group_alloc_default_domain(dev->bus, group, type); - if (ret) - goto restore_old_domain; - - ret = iommu_group_create_direct_mappings(group); - if (ret) - goto free_new_domain; - - ret = __iommu_attach_group(group->default_domain, group); - if (ret) - goto free_new_domain; + if (group->default_domain == dom) + return 0; - iommu_domain_free(prev_dom); + /* + * IOMMU_RESV_DIRECT and IOMMU_RESV_DIRECT_RELAXABLE regions must be + * mapped before their device is attached, in order to guarantee + * continuity with any FW activity + */ + direct_failed = false; + for_each_group_device(group, gdev) { + if (iommu_create_device_direct_mappings(dom, gdev->dev)) { + direct_failed = true; + dev_warn_once( + gdev->dev->iommu->iommu_dev->dev, + "IOMMU driver was not able to establish FW requested direct mapping."); + } + } - return 0; + /* We must set default_domain early for __iommu_device_set_domain */ + group->default_domain = dom; + if (!group->domain) { + /* + * Drivers are not allowed to fail the first domain attach. + * The only way to recover from this is to fail attaching the + * iommu driver and call ops->release_device. Put the domain + * in group->default_domain so it is freed after. + */ + ret = __iommu_group_set_domain_internal( + group, dom, IOMMU_SET_DOMAIN_MUST_SUCCEED); + if (WARN_ON(ret)) + goto out_free; + } else { + ret = __iommu_group_set_domain(group, dom); + if (ret) { + iommu_domain_free(dom); + group->default_domain = old_dom; + return ret; + } + } -free_new_domain: - iommu_domain_free(group->default_domain); -restore_old_domain: - group->default_domain = prev_dom; - group->domain = prev_dom; + /* + * Drivers are supposed to allow mappings to be installed in a domain + * before device attachment, but some don't. Hack around this defect by + * trying again after attaching. If this happens it means the device + * will not continuously have the IOMMU_RESV_DIRECT map. + */ + if (direct_failed) { + for_each_group_device(group, gdev) { + ret = iommu_create_device_direct_mappings(dom, gdev->dev); + if (ret) + goto err_restore; + } + } +err_restore: + if (old_dom) { + __iommu_group_set_domain_internal( + group, old_dom, IOMMU_SET_DOMAIN_MUST_SUCCEED); + iommu_domain_free(dom); + old_dom = NULL; + } +out_free: + if (old_dom) + iommu_domain_free(old_dom); return ret; } @@ -2933,8 +2940,7 @@ restore_old_domain: static ssize_t iommu_group_store_type(struct iommu_group *group, const char *buf, size_t count) { - struct group_device *grp_dev; - struct device *dev; + struct group_device *gdev; int ret, req_type; if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO)) @@ -2959,23 +2965,23 @@ static ssize_t iommu_group_store_type(struct iommu_group *group, if (req_type == IOMMU_DOMAIN_DMA_FQ && group->default_domain->type == IOMMU_DOMAIN_DMA) { ret = iommu_dma_init_fq(group->default_domain); - if (!ret) - group->default_domain->type = IOMMU_DOMAIN_DMA_FQ; - mutex_unlock(&group->mutex); + if (ret) + goto out_unlock; - return ret ?: count; + group->default_domain->type = IOMMU_DOMAIN_DMA_FQ; + ret = count; + goto out_unlock; } /* Otherwise, ensure that device exists and no driver is bound. */ if (list_empty(&group->devices) || group->owner_cnt) { - mutex_unlock(&group->mutex); - return -EPERM; + ret = -EPERM; + goto out_unlock; } - grp_dev = list_first_entry(&group->devices, struct group_device, list); - dev = grp_dev->dev; - - ret = iommu_change_dev_def_domain(group, dev, req_type); + ret = iommu_setup_default_domain(group, req_type); + if (ret) + goto out_unlock; /* * Release the mutex here because ops->probe_finalize() call-back of @@ -2986,9 +2992,12 @@ static ssize_t iommu_group_store_type(struct iommu_group *group, mutex_unlock(&group->mutex); /* Make sure dma_ops is appropriatley set */ - if (!ret) - __iommu_group_dma_finalize(group); + for_each_group_device(group, gdev) + iommu_group_do_probe_finalize(gdev->dev); + return count; +out_unlock: + mutex_unlock(&group->mutex); return ret ?: count; } @@ -3182,16 +3191,13 @@ EXPORT_SYMBOL_GPL(iommu_device_claim_dma_owner); static void __iommu_release_dma_ownership(struct iommu_group *group) { - int ret; - if (WARN_ON(!group->owner_cnt || !group->owner || !xa_empty(&group->pasid_array))) return; group->owner_cnt = 0; group->owner = NULL; - ret = __iommu_group_set_domain(group, group->default_domain); - WARN(ret, "iommu driver failed to attach the default domain"); + __iommu_group_set_domain_nofail(group, group->default_domain); } /** @@ -3253,7 +3259,7 @@ static int __iommu_set_group_pasid(struct iommu_domain *domain, struct group_device *device; int ret = 0; - list_for_each_entry(device, &group->devices, list) { + for_each_group_device(group, device) { ret = domain->ops->set_dev_pasid(domain, device->dev, pasid); if (ret) break; @@ -3268,7 +3274,7 @@ static void __iommu_remove_group_pasid(struct iommu_group *group, struct group_device *device; const struct iommu_ops *ops; - list_for_each_entry(device, &group->devices, list) { + for_each_group_device(group, device) { ops = dev_iommu_ops(device->dev); ops->remove_dev_pasid(device->dev, pasid); } diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index fe452ce46642..10b964600948 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -647,7 +647,13 @@ struct iova_rcache { static struct iova_magazine *iova_magazine_alloc(gfp_t flags) { - return kzalloc(sizeof(struct iova_magazine), flags); + struct iova_magazine *mag; + + mag = kmalloc(sizeof(*mag), flags); + if (mag) + mag->size = 0; + + return mag; } static void iova_magazine_free(struct iova_magazine *mag) diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c index 5b8fe9bfa9a5..3551ed057774 100644 --- a/drivers/iommu/virtio-iommu.c +++ b/drivers/iommu/virtio-iommu.c @@ -788,6 +788,29 @@ static int viommu_attach_dev(struct iommu_domain *domain, struct device *dev) return 0; } +static void viommu_detach_dev(struct viommu_endpoint *vdev) +{ + int i; + struct virtio_iommu_req_detach req; + struct viommu_domain *vdomain = vdev->vdomain; + struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(vdev->dev); + + if (!vdomain) + return; + + req = (struct virtio_iommu_req_detach) { + .head.type = VIRTIO_IOMMU_T_DETACH, + .domain = cpu_to_le32(vdomain->id), + }; + + for (i = 0; i < fwspec->num_ids; i++) { + req.endpoint = cpu_to_le32(fwspec->ids[i]); + WARN_ON(viommu_send_req_sync(vdev->viommu, &req, sizeof(req))); + } + vdomain->nr_endpoints--; + vdev->vdomain = NULL; +} + static int viommu_map_pages(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t pgsize, size_t pgcount, int prot, gfp_t gfp, size_t *mapped) @@ -810,25 +833,26 @@ static int viommu_map_pages(struct iommu_domain *domain, unsigned long iova, if (ret) return ret; - map = (struct virtio_iommu_req_map) { - .head.type = VIRTIO_IOMMU_T_MAP, - .domain = cpu_to_le32(vdomain->id), - .virt_start = cpu_to_le64(iova), - .phys_start = cpu_to_le64(paddr), - .virt_end = cpu_to_le64(end), - .flags = cpu_to_le32(flags), - }; - - if (!vdomain->nr_endpoints) - return 0; + if (vdomain->nr_endpoints) { + map = (struct virtio_iommu_req_map) { + .head.type = VIRTIO_IOMMU_T_MAP, + .domain = cpu_to_le32(vdomain->id), + .virt_start = cpu_to_le64(iova), + .phys_start = cpu_to_le64(paddr), + .virt_end = cpu_to_le64(end), + .flags = cpu_to_le32(flags), + }; - ret = viommu_send_req_sync(vdomain->viommu, &map, sizeof(map)); - if (ret) - viommu_del_mappings(vdomain, iova, end); - else if (mapped) + ret = viommu_send_req_sync(vdomain->viommu, &map, sizeof(map)); + if (ret) { + viommu_del_mappings(vdomain, iova, end); + return ret; + } + } + if (mapped) *mapped = size; - return ret; + return 0; } static size_t viommu_unmap_pages(struct iommu_domain *domain, unsigned long iova, @@ -990,6 +1014,7 @@ static void viommu_release_device(struct device *dev) { struct viommu_endpoint *vdev = dev_iommu_priv_get(dev); + viommu_detach_dev(vdev); iommu_put_resv_regions(dev, &vdev->resv_regions); kfree(vdev); } diff --git a/include/linux/iommu.h b/include/linux/iommu.h index e8c9a7da1060..d31642596675 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -65,6 +65,7 @@ struct iommu_domain_geometry { #define __IOMMU_DOMAIN_SVA (1U << 4) /* Shared process address space */ +#define IOMMU_DOMAIN_ALLOC_FLAGS ~__IOMMU_DOMAIN_DMA_FQ /* * This are the possible domain-types * @@ -127,6 +128,11 @@ enum iommu_cap { * this device. */ IOMMU_CAP_ENFORCE_CACHE_COHERENCY, + /* + * IOMMU driver does not issue TLB maintenance during .unmap, so can + * usefully support the non-strict DMA flush queue. + */ + IOMMU_CAP_DEFERRED_FLUSH, }; /* These are the possible reserved region types */ |