summaryrefslogtreecommitdiff
path: root/drivers/iommu
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/iommu')
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c60
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c80
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h36
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c32
-rw-r--r--drivers/iommu/dma-iommu.c206
-rw-r--r--drivers/iommu/dma-iommu.h14
-rw-r--r--drivers/iommu/exynos-iommu.c4
-rw-r--r--drivers/iommu/intel/iommu.c4
-rw-r--r--drivers/iommu/intel/irq_remapping.c29
-rw-r--r--drivers/iommu/intel/nested.c2
-rw-r--r--drivers/iommu/iommu-priv.h16
-rw-r--r--drivers/iommu/iommu-sva.c1
-rw-r--r--drivers/iommu/iommu.c166
-rw-r--r--drivers/iommu/iommufd/Kconfig2
-rw-r--r--drivers/iommu/iommufd/Makefile2
-rw-r--r--drivers/iommu/iommufd/device.c499
-rw-r--r--drivers/iommu/iommufd/driver.c198
-rw-r--r--drivers/iommu/iommufd/eventq.c598
-rw-r--r--drivers/iommu/iommufd/fault.c342
-rw-r--r--drivers/iommu/iommufd/hw_pagetable.c42
-rw-r--r--drivers/iommu/iommufd/iommufd_private.h156
-rw-r--r--drivers/iommu/iommufd/iommufd_test.h40
-rw-r--r--drivers/iommu/iommufd/main.c7
-rw-r--r--drivers/iommu/iommufd/selftest.c297
-rw-r--r--drivers/iommu/iommufd/viommu.c2
-rw-r--r--drivers/iommu/ipmmu-vmsa.c27
-rw-r--r--drivers/iommu/mtk_iommu.c26
27 files changed, 2044 insertions, 844 deletions
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c
index 5aa2e7af58b4..e4fd8d522af8 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c
@@ -43,6 +43,8 @@ static void arm_smmu_make_nested_cd_table_ste(
target->data[0] |= nested_domain->ste[0] &
~cpu_to_le64(STRTAB_STE_0_CFG);
target->data[1] |= nested_domain->ste[1];
+ /* Merge events for DoS mitigations on eventq */
+ target->data[1] |= cpu_to_le64(STRTAB_STE_1_MEV);
}
/*
@@ -85,6 +87,47 @@ static void arm_smmu_make_nested_domain_ste(
}
}
+int arm_smmu_attach_prepare_vmaster(struct arm_smmu_attach_state *state,
+ struct arm_smmu_nested_domain *nested_domain)
+{
+ struct arm_smmu_vmaster *vmaster;
+ unsigned long vsid;
+ int ret;
+
+ iommu_group_mutex_assert(state->master->dev);
+
+ ret = iommufd_viommu_get_vdev_id(&nested_domain->vsmmu->core,
+ state->master->dev, &vsid);
+ if (ret)
+ return ret;
+
+ vmaster = kzalloc(sizeof(*vmaster), GFP_KERNEL);
+ if (!vmaster)
+ return -ENOMEM;
+ vmaster->vsmmu = nested_domain->vsmmu;
+ vmaster->vsid = vsid;
+ state->vmaster = vmaster;
+
+ return 0;
+}
+
+void arm_smmu_attach_commit_vmaster(struct arm_smmu_attach_state *state)
+{
+ struct arm_smmu_master *master = state->master;
+
+ mutex_lock(&master->smmu->streams_mutex);
+ kfree(master->vmaster);
+ master->vmaster = state->vmaster;
+ mutex_unlock(&master->smmu->streams_mutex);
+}
+
+void arm_smmu_master_clear_vmaster(struct arm_smmu_master *master)
+{
+ struct arm_smmu_attach_state state = { .master = master };
+
+ arm_smmu_attach_commit_vmaster(&state);
+}
+
static int arm_smmu_attach_dev_nested(struct iommu_domain *domain,
struct device *dev)
{
@@ -392,4 +435,21 @@ struct iommufd_viommu *arm_vsmmu_alloc(struct device *dev,
return &vsmmu->core;
}
+int arm_vmaster_report_event(struct arm_smmu_vmaster *vmaster, u64 *evt)
+{
+ struct iommu_vevent_arm_smmuv3 vevt;
+ int i;
+
+ lockdep_assert_held(&vmaster->vsmmu->smmu->streams_mutex);
+
+ vevt.evt[0] = cpu_to_le64((evt[0] & ~EVTQ_0_SID) |
+ FIELD_PREP(EVTQ_0_SID, vmaster->vsid));
+ for (i = 1; i < EVTQ_ENT_DWORDS; i++)
+ vevt.evt[i] = cpu_to_le64(evt[i]);
+
+ return iommufd_viommu_report_event(&vmaster->vsmmu->core,
+ IOMMU_VEVENTQ_TYPE_ARM_SMMUV3, &vevt,
+ sizeof(vevt));
+}
+
MODULE_IMPORT_NS("IOMMUFD");
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index 358072b4e293..b4c21aaed126 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -1052,7 +1052,7 @@ void arm_smmu_get_ste_used(const __le64 *ent, __le64 *used_bits)
cpu_to_le64(STRTAB_STE_1_S1DSS | STRTAB_STE_1_S1CIR |
STRTAB_STE_1_S1COR | STRTAB_STE_1_S1CSH |
STRTAB_STE_1_S1STALLD | STRTAB_STE_1_STRW |
- STRTAB_STE_1_EATS);
+ STRTAB_STE_1_EATS | STRTAB_STE_1_MEV);
used_bits[2] |= cpu_to_le64(STRTAB_STE_2_S2VMID);
/*
@@ -1068,7 +1068,7 @@ void arm_smmu_get_ste_used(const __le64 *ent, __le64 *used_bits)
if (cfg & BIT(1)) {
used_bits[1] |=
cpu_to_le64(STRTAB_STE_1_S2FWB | STRTAB_STE_1_EATS |
- STRTAB_STE_1_SHCFG);
+ STRTAB_STE_1_SHCFG | STRTAB_STE_1_MEV);
used_bits[2] |=
cpu_to_le64(STRTAB_STE_2_S2VMID | STRTAB_STE_2_VTCR |
STRTAB_STE_2_S2AA64 | STRTAB_STE_2_S2ENDI |
@@ -1813,8 +1813,8 @@ static void arm_smmu_decode_event(struct arm_smmu_device *smmu, u64 *raw,
mutex_unlock(&smmu->streams_mutex);
}
-static int arm_smmu_handle_event(struct arm_smmu_device *smmu,
- struct arm_smmu_event *event)
+static int arm_smmu_handle_event(struct arm_smmu_device *smmu, u64 *evt,
+ struct arm_smmu_event *event)
{
int ret = 0;
u32 perm = 0;
@@ -1823,6 +1823,10 @@ static int arm_smmu_handle_event(struct arm_smmu_device *smmu,
struct iommu_fault *flt = &fault_evt.fault;
switch (event->id) {
+ case EVT_ID_BAD_STE_CONFIG:
+ case EVT_ID_STREAM_DISABLED_FAULT:
+ case EVT_ID_BAD_SUBSTREAMID_CONFIG:
+ case EVT_ID_BAD_CD_CONFIG:
case EVT_ID_TRANSLATION_FAULT:
case EVT_ID_ADDR_SIZE_FAULT:
case EVT_ID_ACCESS_FAULT:
@@ -1832,31 +1836,30 @@ static int arm_smmu_handle_event(struct arm_smmu_device *smmu,
return -EOPNOTSUPP;
}
- if (!event->stall)
- return -EOPNOTSUPP;
-
- if (event->read)
- perm |= IOMMU_FAULT_PERM_READ;
- else
- perm |= IOMMU_FAULT_PERM_WRITE;
+ if (event->stall) {
+ if (event->read)
+ perm |= IOMMU_FAULT_PERM_READ;
+ else
+ perm |= IOMMU_FAULT_PERM_WRITE;
- if (event->instruction)
- perm |= IOMMU_FAULT_PERM_EXEC;
+ if (event->instruction)
+ perm |= IOMMU_FAULT_PERM_EXEC;
- if (event->privileged)
- perm |= IOMMU_FAULT_PERM_PRIV;
+ if (event->privileged)
+ perm |= IOMMU_FAULT_PERM_PRIV;
- flt->type = IOMMU_FAULT_PAGE_REQ;
- flt->prm = (struct iommu_fault_page_request) {
- .flags = IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE,
- .grpid = event->stag,
- .perm = perm,
- .addr = event->iova,
- };
+ flt->type = IOMMU_FAULT_PAGE_REQ;
+ flt->prm = (struct iommu_fault_page_request){
+ .flags = IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE,
+ .grpid = event->stag,
+ .perm = perm,
+ .addr = event->iova,
+ };
- if (event->ssv) {
- flt->prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
- flt->prm.pasid = event->ssid;
+ if (event->ssv) {
+ flt->prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
+ flt->prm.pasid = event->ssid;
+ }
}
mutex_lock(&smmu->streams_mutex);
@@ -1866,7 +1869,12 @@ static int arm_smmu_handle_event(struct arm_smmu_device *smmu,
goto out_unlock;
}
- ret = iommu_report_device_fault(master->dev, &fault_evt);
+ if (event->stall)
+ ret = iommu_report_device_fault(master->dev, &fault_evt);
+ else if (master->vmaster && !event->s2)
+ ret = arm_vmaster_report_event(master->vmaster, evt);
+ else
+ ret = -EOPNOTSUPP; /* Unhandled events should be pinned */
out_unlock:
mutex_unlock(&smmu->streams_mutex);
return ret;
@@ -1944,7 +1952,7 @@ static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
do {
while (!queue_remove_raw(q, evt)) {
arm_smmu_decode_event(smmu, evt, &event);
- if (arm_smmu_handle_event(smmu, &event))
+ if (arm_smmu_handle_event(smmu, evt, &event))
arm_smmu_dump_event(smmu, evt, &event, &rs);
put_device(event.dev);
@@ -2803,6 +2811,7 @@ int arm_smmu_attach_prepare(struct arm_smmu_attach_state *state,
struct arm_smmu_domain *smmu_domain =
to_smmu_domain_devices(new_domain);
unsigned long flags;
+ int ret;
/*
* arm_smmu_share_asid() must not see two domains pointing to the same
@@ -2832,9 +2841,18 @@ int arm_smmu_attach_prepare(struct arm_smmu_attach_state *state,
}
if (smmu_domain) {
+ if (new_domain->type == IOMMU_DOMAIN_NESTED) {
+ ret = arm_smmu_attach_prepare_vmaster(
+ state, to_smmu_nested_domain(new_domain));
+ if (ret)
+ return ret;
+ }
+
master_domain = kzalloc(sizeof(*master_domain), GFP_KERNEL);
- if (!master_domain)
+ if (!master_domain) {
+ kfree(state->vmaster);
return -ENOMEM;
+ }
master_domain->master = master;
master_domain->ssid = state->ssid;
if (new_domain->type == IOMMU_DOMAIN_NESTED)
@@ -2861,6 +2879,7 @@ int arm_smmu_attach_prepare(struct arm_smmu_attach_state *state,
spin_unlock_irqrestore(&smmu_domain->devices_lock,
flags);
kfree(master_domain);
+ kfree(state->vmaster);
return -EINVAL;
}
@@ -2893,6 +2912,8 @@ void arm_smmu_attach_commit(struct arm_smmu_attach_state *state)
lockdep_assert_held(&arm_smmu_asid_lock);
+ arm_smmu_attach_commit_vmaster(state);
+
if (state->ats_enabled && !master->ats_enabled) {
arm_smmu_enable_ats(master);
} else if (state->ats_enabled && master->ats_enabled) {
@@ -3162,6 +3183,7 @@ static int arm_smmu_attach_dev_identity(struct iommu_domain *domain,
struct arm_smmu_ste ste;
struct arm_smmu_master *master = dev_iommu_priv_get(dev);
+ arm_smmu_master_clear_vmaster(master);
arm_smmu_make_bypass_ste(master->smmu, &ste);
arm_smmu_attach_dev_ste(domain, dev, &ste, STRTAB_STE_1_S1DSS_BYPASS);
return 0;
@@ -3180,7 +3202,9 @@ static int arm_smmu_attach_dev_blocked(struct iommu_domain *domain,
struct device *dev)
{
struct arm_smmu_ste ste;
+ struct arm_smmu_master *master = dev_iommu_priv_get(dev);
+ arm_smmu_master_clear_vmaster(master);
arm_smmu_make_abort_ste(&ste);
arm_smmu_attach_dev_ste(domain, dev, &ste,
STRTAB_STE_1_S1DSS_TERMINATE);
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index bd9d7c85576a..dd1ad56ce863 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -266,6 +266,7 @@ static inline u32 arm_smmu_strtab_l2_idx(u32 sid)
#define STRTAB_STE_1_S1COR GENMASK_ULL(5, 4)
#define STRTAB_STE_1_S1CSH GENMASK_ULL(7, 6)
+#define STRTAB_STE_1_MEV (1UL << 19)
#define STRTAB_STE_1_S2FWB (1UL << 25)
#define STRTAB_STE_1_S1STALLD (1UL << 27)
@@ -799,6 +800,11 @@ struct arm_smmu_stream {
struct rb_node node;
};
+struct arm_smmu_vmaster {
+ struct arm_vsmmu *vsmmu;
+ unsigned long vsid;
+};
+
struct arm_smmu_event {
u8 stall : 1,
ssv : 1,
@@ -824,6 +830,7 @@ struct arm_smmu_master {
struct arm_smmu_device *smmu;
struct device *dev;
struct arm_smmu_stream *streams;
+ struct arm_smmu_vmaster *vmaster; /* use smmu->streams_mutex */
/* Locked by the iommu core using the group mutex */
struct arm_smmu_ctx_desc_cfg cd_table;
unsigned int num_streams;
@@ -972,6 +979,7 @@ struct arm_smmu_attach_state {
bool disable_ats;
ioasid_t ssid;
/* Resulting state */
+ struct arm_smmu_vmaster *vmaster;
bool ats_enabled;
};
@@ -1055,9 +1063,37 @@ struct iommufd_viommu *arm_vsmmu_alloc(struct device *dev,
struct iommu_domain *parent,
struct iommufd_ctx *ictx,
unsigned int viommu_type);
+int arm_smmu_attach_prepare_vmaster(struct arm_smmu_attach_state *state,
+ struct arm_smmu_nested_domain *nested_domain);
+void arm_smmu_attach_commit_vmaster(struct arm_smmu_attach_state *state);
+void arm_smmu_master_clear_vmaster(struct arm_smmu_master *master);
+int arm_vmaster_report_event(struct arm_smmu_vmaster *vmaster, u64 *evt);
#else
#define arm_smmu_hw_info NULL
#define arm_vsmmu_alloc NULL
+
+static inline int
+arm_smmu_attach_prepare_vmaster(struct arm_smmu_attach_state *state,
+ struct arm_smmu_nested_domain *nested_domain)
+{
+ return 0;
+}
+
+static inline void
+arm_smmu_attach_commit_vmaster(struct arm_smmu_attach_state *state)
+{
+}
+
+static inline void
+arm_smmu_master_clear_vmaster(struct arm_smmu_master *master)
+{
+}
+
+static inline int arm_vmaster_report_event(struct arm_smmu_vmaster *vmaster,
+ u64 *evt)
+{
+ return -EOPNOTSUPP;
+}
#endif /* CONFIG_ARM_SMMU_V3_IOMMUFD */
#endif /* _ARM_SMMU_V3_H */
diff --git a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c
index d525ab43a4ae..dd7d030d2e89 100644
--- a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c
+++ b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c
@@ -487,17 +487,6 @@ static int tegra241_cmdqv_hw_reset(struct arm_smmu_device *smmu)
/* VCMDQ Resource Helpers */
-static void tegra241_vcmdq_free_smmu_cmdq(struct tegra241_vcmdq *vcmdq)
-{
- struct arm_smmu_queue *q = &vcmdq->cmdq.q;
- size_t nents = 1 << q->llq.max_n_shift;
- size_t qsz = nents << CMDQ_ENT_SZ_SHIFT;
-
- if (!q->base)
- return;
- dmam_free_coherent(vcmdq->cmdqv->smmu.dev, qsz, q->base, q->base_dma);
-}
-
static int tegra241_vcmdq_alloc_smmu_cmdq(struct tegra241_vcmdq *vcmdq)
{
struct arm_smmu_device *smmu = &vcmdq->cmdqv->smmu;
@@ -560,7 +549,8 @@ static void tegra241_vintf_free_lvcmdq(struct tegra241_vintf *vintf, u16 lidx)
struct tegra241_vcmdq *vcmdq = vintf->lvcmdqs[lidx];
char header[64];
- tegra241_vcmdq_free_smmu_cmdq(vcmdq);
+ /* Note that the lvcmdq queue memory space is managed by devres */
+
tegra241_vintf_deinit_lvcmdq(vintf, lidx);
dev_dbg(vintf->cmdqv->dev,
@@ -768,13 +758,13 @@ static int tegra241_cmdqv_init_structures(struct arm_smmu_device *smmu)
vintf = kzalloc(sizeof(*vintf), GFP_KERNEL);
if (!vintf)
- goto out_fallback;
+ return -ENOMEM;
/* Init VINTF0 for in-kernel use */
ret = tegra241_cmdqv_init_vintf(cmdqv, 0, vintf);
if (ret) {
dev_err(cmdqv->dev, "failed to init vintf0: %d\n", ret);
- goto free_vintf;
+ return ret;
}
/* Preallocate logical VCMDQs to VINTF0 */
@@ -783,24 +773,12 @@ static int tegra241_cmdqv_init_structures(struct arm_smmu_device *smmu)
vcmdq = tegra241_vintf_alloc_lvcmdq(vintf, lidx);
if (IS_ERR(vcmdq))
- goto free_lvcmdq;
+ return PTR_ERR(vcmdq);
}
/* Now, we are ready to run all the impl ops */
smmu->impl_ops = &tegra241_cmdqv_impl_ops;
return 0;
-
-free_lvcmdq:
- for (lidx--; lidx >= 0; lidx--)
- tegra241_vintf_free_lvcmdq(vintf, lidx);
- tegra241_cmdqv_deinit_vintf(cmdqv, vintf->idx);
-free_vintf:
- kfree(vintf);
-out_fallback:
- dev_info(smmu->impl_dev, "Falling back to standard SMMU CMDQ\n");
- smmu->options &= ~ARM_SMMU_OPT_TEGRA241_CMDQV;
- tegra241_cmdqv_remove(smmu);
- return 0;
}
#ifdef CONFIG_IOMMU_DEBUGFS
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 0832998eca38..a775e4dbe06f 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -42,11 +42,6 @@ struct iommu_dma_msi_page {
phys_addr_t phys;
};
-enum iommu_dma_cookie_type {
- IOMMU_DMA_IOVA_COOKIE,
- IOMMU_DMA_MSI_COOKIE,
-};
-
enum iommu_dma_queue_type {
IOMMU_DMA_OPTS_PER_CPU_QUEUE,
IOMMU_DMA_OPTS_SINGLE_QUEUE,
@@ -59,34 +54,30 @@ struct iommu_dma_options {
};
struct iommu_dma_cookie {
- enum iommu_dma_cookie_type type;
+ struct iova_domain iovad;
+ struct list_head msi_page_list;
+ /* Flush queue */
union {
- /* Full allocator for IOMMU_DMA_IOVA_COOKIE */
- struct {
- struct iova_domain iovad;
- /* Flush queue */
- union {
- struct iova_fq *single_fq;
- struct iova_fq __percpu *percpu_fq;
- };
- /* Number of TLB flushes that have been started */
- atomic64_t fq_flush_start_cnt;
- /* Number of TLB flushes that have been finished */
- atomic64_t fq_flush_finish_cnt;
- /* Timer to regularily empty the flush queues */
- struct timer_list fq_timer;
- /* 1 when timer is active, 0 when not */
- atomic_t fq_timer_on;
- };
- /* Trivial linear page allocator for IOMMU_DMA_MSI_COOKIE */
- dma_addr_t msi_iova;
+ struct iova_fq *single_fq;
+ struct iova_fq __percpu *percpu_fq;
};
- struct list_head msi_page_list;
-
+ /* Number of TLB flushes that have been started */
+ atomic64_t fq_flush_start_cnt;
+ /* Number of TLB flushes that have been finished */
+ atomic64_t fq_flush_finish_cnt;
+ /* Timer to regularily empty the flush queues */
+ struct timer_list fq_timer;
+ /* 1 when timer is active, 0 when not */
+ atomic_t fq_timer_on;
/* Domain for flush queue callback; NULL if flush queue not in use */
- struct iommu_domain *fq_domain;
+ struct iommu_domain *fq_domain;
/* Options for dma-iommu use */
- struct iommu_dma_options options;
+ struct iommu_dma_options options;
+};
+
+struct iommu_dma_msi_cookie {
+ dma_addr_t msi_iova;
+ struct list_head msi_page_list;
};
static DEFINE_STATIC_KEY_FALSE(iommu_deferred_attach_enabled);
@@ -102,9 +93,6 @@ static int __init iommu_dma_forcedac_setup(char *str)
}
early_param("iommu.forcedac", iommu_dma_forcedac_setup);
-static int iommu_dma_sw_msi(struct iommu_domain *domain, struct msi_desc *desc,
- phys_addr_t msi_addr);
-
/* Number of entries per flush queue */
#define IOVA_DEFAULT_FQ_SIZE 256
#define IOVA_SINGLE_FQ_SIZE 32768
@@ -283,7 +271,7 @@ static void iommu_dma_free_fq(struct iommu_dma_cookie *cookie)
if (!cookie->fq_domain)
return;
- del_timer_sync(&cookie->fq_timer);
+ timer_delete_sync(&cookie->fq_timer);
if (cookie->options.qt == IOMMU_DMA_OPTS_SINGLE_QUEUE)
iommu_dma_free_fq_single(cookie->single_fq);
else
@@ -368,39 +356,24 @@ int iommu_dma_init_fq(struct iommu_domain *domain)
return 0;
}
-static inline size_t cookie_msi_granule(struct iommu_dma_cookie *cookie)
-{
- if (cookie->type == IOMMU_DMA_IOVA_COOKIE)
- return cookie->iovad.granule;
- return PAGE_SIZE;
-}
-
-static struct iommu_dma_cookie *cookie_alloc(enum iommu_dma_cookie_type type)
-{
- struct iommu_dma_cookie *cookie;
-
- cookie = kzalloc(sizeof(*cookie), GFP_KERNEL);
- if (cookie) {
- INIT_LIST_HEAD(&cookie->msi_page_list);
- cookie->type = type;
- }
- return cookie;
-}
-
/**
* iommu_get_dma_cookie - Acquire DMA-API resources for a domain
* @domain: IOMMU domain to prepare for DMA-API usage
*/
int iommu_get_dma_cookie(struct iommu_domain *domain)
{
- if (domain->iova_cookie)
+ struct iommu_dma_cookie *cookie;
+
+ if (domain->cookie_type != IOMMU_COOKIE_NONE)
return -EEXIST;
- domain->iova_cookie = cookie_alloc(IOMMU_DMA_IOVA_COOKIE);
- if (!domain->iova_cookie)
+ cookie = kzalloc(sizeof(*cookie), GFP_KERNEL);
+ if (!cookie)
return -ENOMEM;
- iommu_domain_set_sw_msi(domain, iommu_dma_sw_msi);
+ INIT_LIST_HEAD(&cookie->msi_page_list);
+ domain->cookie_type = IOMMU_COOKIE_DMA_IOVA;
+ domain->iova_cookie = cookie;
return 0;
}
@@ -418,54 +391,56 @@ int iommu_get_dma_cookie(struct iommu_domain *domain)
*/
int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base)
{
- struct iommu_dma_cookie *cookie;
+ struct iommu_dma_msi_cookie *cookie;
if (domain->type != IOMMU_DOMAIN_UNMANAGED)
return -EINVAL;
- if (domain->iova_cookie)
+ if (domain->cookie_type != IOMMU_COOKIE_NONE)
return -EEXIST;
- cookie = cookie_alloc(IOMMU_DMA_MSI_COOKIE);
+ cookie = kzalloc(sizeof(*cookie), GFP_KERNEL);
if (!cookie)
return -ENOMEM;
cookie->msi_iova = base;
- domain->iova_cookie = cookie;
- iommu_domain_set_sw_msi(domain, iommu_dma_sw_msi);
+ INIT_LIST_HEAD(&cookie->msi_page_list);
+ domain->cookie_type = IOMMU_COOKIE_DMA_MSI;
+ domain->msi_cookie = cookie;
return 0;
}
EXPORT_SYMBOL(iommu_get_msi_cookie);
/**
* iommu_put_dma_cookie - Release a domain's DMA mapping resources
- * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie() or
- * iommu_get_msi_cookie()
+ * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie()
*/
void iommu_put_dma_cookie(struct iommu_domain *domain)
{
struct iommu_dma_cookie *cookie = domain->iova_cookie;
struct iommu_dma_msi_page *msi, *tmp;
-#if IS_ENABLED(CONFIG_IRQ_MSI_IOMMU)
- if (domain->sw_msi != iommu_dma_sw_msi)
- return;
-#endif
-
- if (!cookie)
- return;
-
- if (cookie->type == IOMMU_DMA_IOVA_COOKIE && cookie->iovad.granule) {
+ if (cookie->iovad.granule) {
iommu_dma_free_fq(cookie);
put_iova_domain(&cookie->iovad);
}
+ list_for_each_entry_safe(msi, tmp, &cookie->msi_page_list, list)
+ kfree(msi);
+ kfree(cookie);
+}
+
+/**
+ * iommu_put_msi_cookie - Release a domain's MSI mapping resources
+ * @domain: IOMMU domain previously prepared by iommu_get_msi_cookie()
+ */
+void iommu_put_msi_cookie(struct iommu_domain *domain)
+{
+ struct iommu_dma_msi_cookie *cookie = domain->msi_cookie;
+ struct iommu_dma_msi_page *msi, *tmp;
- list_for_each_entry_safe(msi, tmp, &cookie->msi_page_list, list) {
- list_del(&msi->list);
+ list_for_each_entry_safe(msi, tmp, &cookie->msi_page_list, list)
kfree(msi);
- }
kfree(cookie);
- domain->iova_cookie = NULL;
}
/**
@@ -685,7 +660,7 @@ static int iommu_dma_init_domain(struct iommu_domain *domain, struct device *dev
struct iova_domain *iovad;
int ret;
- if (!cookie || cookie->type != IOMMU_DMA_IOVA_COOKIE)
+ if (!cookie || domain->cookie_type != IOMMU_COOKIE_DMA_IOVA)
return -EINVAL;
iovad = &cookie->iovad;
@@ -768,9 +743,9 @@ static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain,
struct iova_domain *iovad = &cookie->iovad;
unsigned long shift, iova_len, iova;
- if (cookie->type == IOMMU_DMA_MSI_COOKIE) {
- cookie->msi_iova += size;
- return cookie->msi_iova - size;
+ if (domain->cookie_type == IOMMU_COOKIE_DMA_MSI) {
+ domain->msi_cookie->msi_iova += size;
+ return domain->msi_cookie->msi_iova - size;
}
shift = iova_shift(iovad);
@@ -807,16 +782,16 @@ done:
return (dma_addr_t)iova << shift;
}
-static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie,
- dma_addr_t iova, size_t size, struct iommu_iotlb_gather *gather)
+static void iommu_dma_free_iova(struct iommu_domain *domain, dma_addr_t iova,
+ size_t size, struct iommu_iotlb_gather *gather)
{
- struct iova_domain *iovad = &cookie->iovad;
+ struct iova_domain *iovad = &domain->iova_cookie->iovad;
/* The MSI case is only ever cleaning up its most recent allocation */
- if (cookie->type == IOMMU_DMA_MSI_COOKIE)
- cookie->msi_iova -= size;
+ if (domain->cookie_type == IOMMU_COOKIE_DMA_MSI)
+ domain->msi_cookie->msi_iova -= size;
else if (gather && gather->queued)
- queue_iova(cookie, iova_pfn(iovad, iova),
+ queue_iova(domain->iova_cookie, iova_pfn(iovad, iova),
size >> iova_shift(iovad),
&gather->freelist);
else
@@ -844,7 +819,7 @@ static void __iommu_dma_unmap(struct device *dev, dma_addr_t dma_addr,
if (!iotlb_gather.queued)
iommu_iotlb_sync(domain, &iotlb_gather);
- iommu_dma_free_iova(cookie, dma_addr, size, &iotlb_gather);
+ iommu_dma_free_iova(domain, dma_addr, size, &iotlb_gather);
}
static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys,
@@ -872,7 +847,7 @@ static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys,
return DMA_MAPPING_ERROR;
if (iommu_map(domain, iova, phys - iova_off, size, prot, GFP_ATOMIC)) {
- iommu_dma_free_iova(cookie, iova, size, NULL);
+ iommu_dma_free_iova(domain, iova, size, NULL);
return DMA_MAPPING_ERROR;
}
return iova + iova_off;
@@ -1009,7 +984,7 @@ static struct page **__iommu_dma_alloc_noncontiguous(struct device *dev,
out_free_sg:
sg_free_table(sgt);
out_free_iova:
- iommu_dma_free_iova(cookie, iova, size, NULL);
+ iommu_dma_free_iova(domain, iova, size, NULL);
out_free_pages:
__iommu_dma_free_pages(pages, count);
return NULL;
@@ -1486,7 +1461,7 @@ int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
return __finalise_sg(dev, sg, nents, iova);
out_free_iova:
- iommu_dma_free_iova(cookie, iova, iova_len, NULL);
+ iommu_dma_free_iova(domain, iova, iova_len, NULL);
out_restore_sg:
__invalidate_sg(sg, nents);
out:
@@ -1764,17 +1739,47 @@ out_err:
dev->dma_iommu = false;
}
+static bool has_msi_cookie(const struct iommu_domain *domain)
+{
+ return domain && (domain->cookie_type == IOMMU_COOKIE_DMA_IOVA ||
+ domain->cookie_type == IOMMU_COOKIE_DMA_MSI);
+}
+
+static size_t cookie_msi_granule(const struct iommu_domain *domain)
+{
+ switch (domain->cookie_type) {
+ case IOMMU_COOKIE_DMA_IOVA:
+ return domain->iova_cookie->iovad.granule;
+ case IOMMU_COOKIE_DMA_MSI:
+ return PAGE_SIZE;
+ default:
+ BUG();
+ }
+}
+
+static struct list_head *cookie_msi_pages(const struct iommu_domain *domain)
+{
+ switch (domain->cookie_type) {
+ case IOMMU_COOKIE_DMA_IOVA:
+ return &domain->iova_cookie->msi_page_list;
+ case IOMMU_COOKIE_DMA_MSI:
+ return &domain->msi_cookie->msi_page_list;
+ default:
+ BUG();
+ }
+}
+
static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev,
phys_addr_t msi_addr, struct iommu_domain *domain)
{
- struct iommu_dma_cookie *cookie = domain->iova_cookie;
+ struct list_head *msi_page_list = cookie_msi_pages(domain);
struct iommu_dma_msi_page *msi_page;
dma_addr_t iova;
int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
- size_t size = cookie_msi_granule(cookie);
+ size_t size = cookie_msi_granule(domain);
msi_addr &= ~(phys_addr_t)(size - 1);
- list_for_each_entry(msi_page, &cookie->msi_page_list, list)
+ list_for_each_entry(msi_page, msi_page_list, list)
if (msi_page->phys == msi_addr)
return msi_page;
@@ -1792,23 +1797,23 @@ static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev,
INIT_LIST_HEAD(&msi_page->list);
msi_page->phys = msi_addr;
msi_page->iova = iova;
- list_add(&msi_page->list, &cookie->msi_page_list);
+ list_add(&msi_page->list, msi_page_list);
return msi_page;
out_free_iova:
- iommu_dma_free_iova(cookie, iova, size, NULL);
+ iommu_dma_free_iova(domain, iova, size, NULL);
out_free_page:
kfree(msi_page);
return NULL;
}
-static int iommu_dma_sw_msi(struct iommu_domain *domain, struct msi_desc *desc,
- phys_addr_t msi_addr)
+int iommu_dma_sw_msi(struct iommu_domain *domain, struct msi_desc *desc,
+ phys_addr_t msi_addr)
{
struct device *dev = msi_desc_to_dev(desc);
const struct iommu_dma_msi_page *msi_page;
- if (!domain->iova_cookie) {
+ if (!has_msi_cookie(domain)) {
msi_desc_set_iommu_msi_iova(desc, 0, 0);
return 0;
}
@@ -1818,9 +1823,8 @@ static int iommu_dma_sw_msi(struct iommu_domain *domain, struct msi_desc *desc,
if (!msi_page)
return -ENOMEM;
- msi_desc_set_iommu_msi_iova(
- desc, msi_page->iova,
- ilog2(cookie_msi_granule(domain->iova_cookie)));
+ msi_desc_set_iommu_msi_iova(desc, msi_page->iova,
+ ilog2(cookie_msi_granule(domain)));
return 0;
}
diff --git a/drivers/iommu/dma-iommu.h b/drivers/iommu/dma-iommu.h
index c12d63457c76..eca201c1f963 100644
--- a/drivers/iommu/dma-iommu.h
+++ b/drivers/iommu/dma-iommu.h
@@ -13,11 +13,15 @@ void iommu_setup_dma_ops(struct device *dev);
int iommu_get_dma_cookie(struct iommu_domain *domain);
void iommu_put_dma_cookie(struct iommu_domain *domain);
+void iommu_put_msi_cookie(struct iommu_domain *domain);
int iommu_dma_init_fq(struct iommu_domain *domain);
void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list);
+int iommu_dma_sw_msi(struct iommu_domain *domain, struct msi_desc *desc,
+ phys_addr_t msi_addr);
+
extern bool iommu_dma_forcedac;
#else /* CONFIG_IOMMU_DMA */
@@ -40,9 +44,19 @@ static inline void iommu_put_dma_cookie(struct iommu_domain *domain)
{
}
+static inline void iommu_put_msi_cookie(struct iommu_domain *domain)
+{
+}
+
static inline void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list)
{
}
+static inline int iommu_dma_sw_msi(struct iommu_domain *domain,
+ struct msi_desc *desc, phys_addr_t msi_addr)
+{
+ return -ENODEV;
+}
+
#endif /* CONFIG_IOMMU_DMA */
#endif /* __DMA_IOMMU_H */
diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c
index 69e23e017d9e..317266aca6e2 100644
--- a/drivers/iommu/exynos-iommu.c
+++ b/drivers/iommu/exynos-iommu.c
@@ -832,7 +832,7 @@ static int __maybe_unused exynos_sysmmu_suspend(struct device *dev)
struct exynos_iommu_owner *owner = dev_iommu_priv_get(master);
mutex_lock(&owner->rpm_lock);
- if (&data->domain->domain != &exynos_identity_domain) {
+ if (data->domain) {
dev_dbg(data->sysmmu, "saving state\n");
__sysmmu_disable(data);
}
@@ -850,7 +850,7 @@ static int __maybe_unused exynos_sysmmu_resume(struct device *dev)
struct exynos_iommu_owner *owner = dev_iommu_priv_get(master);
mutex_lock(&owner->rpm_lock);
- if (&data->domain->domain != &exynos_identity_domain) {
+ if (data->domain) {
dev_dbg(data->sysmmu, "restoring state\n");
__sysmmu_enable(data);
}
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index ec2f385ae25b..b29da2d96d0b 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -3383,7 +3383,8 @@ intel_iommu_domain_alloc_paging_flags(struct device *dev, u32 flags,
bool first_stage;
if (flags &
- (~(IOMMU_HWPT_ALLOC_NEST_PARENT | IOMMU_HWPT_ALLOC_DIRTY_TRACKING)))
+ (~(IOMMU_HWPT_ALLOC_NEST_PARENT | IOMMU_HWPT_ALLOC_DIRTY_TRACKING |
+ IOMMU_HWPT_ALLOC_PASID)))
return ERR_PTR(-EOPNOTSUPP);
if (nested_parent && !nested_supported(iommu))
return ERR_PTR(-EOPNOTSUPP);
@@ -3834,7 +3835,6 @@ static void intel_iommu_release_device(struct device *dev)
intel_pasid_free_table(dev);
intel_iommu_debugfs_remove_dev(info);
kfree(info);
- set_dma_ops(dev, NULL);
}
static void intel_iommu_get_resv_regions(struct device *device,
diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c
index ea3ca5203919..3bc2a03cceca 100644
--- a/drivers/iommu/intel/irq_remapping.c
+++ b/drivers/iommu/intel/irq_remapping.c
@@ -1287,43 +1287,44 @@ static struct irq_chip intel_ir_chip = {
};
/*
- * With posted MSIs, all vectors are multiplexed into a single notification
- * vector. Devices MSIs are then dispatched in a demux loop where
- * EOIs can be coalesced as well.
+ * With posted MSIs, the MSI vectors are multiplexed into a single notification
+ * vector, and only the notification vector is sent to the APIC IRR. Device
+ * MSIs are then dispatched in a demux loop that harvests the MSIs from the
+ * CPU's Posted Interrupt Request bitmap. I.e. Posted MSIs never get sent to
+ * the APIC IRR, and thus do not need an EOI. The notification handler instead
+ * performs a single EOI after processing the PIR.
*
- * "INTEL-IR-POST" IRQ chip does not do EOI on ACK, thus the dummy irq_ack()
- * function. Instead EOI is performed by the posted interrupt notification
- * handler.
+ * Note! Pending SMP/CPU affinity changes, which are per MSI, must still be
+ * honored, only the APIC EOI is omitted.
*
* For the example below, 3 MSIs are coalesced into one CPU notification. Only
- * one apic_eoi() is needed.
+ * one apic_eoi() is needed, but each MSI needs to process pending changes to
+ * its CPU affinity.
*
* __sysvec_posted_msi_notification()
* irq_enter();
* handle_edge_irq()
* irq_chip_ack_parent()
- * dummy(); // No EOI
+ * irq_move_irq(); // No EOI
* handle_irq_event()
* driver_handler()
* handle_edge_irq()
* irq_chip_ack_parent()
- * dummy(); // No EOI
+ * irq_move_irq(); // No EOI
* handle_irq_event()
* driver_handler()
* handle_edge_irq()
* irq_chip_ack_parent()
- * dummy(); // No EOI
+ * irq_move_irq(); // No EOI
* handle_irq_event()
* driver_handler()
* apic_eoi()
* irq_exit()
+ *
*/
-
-static void dummy_ack(struct irq_data *d) { }
-
static struct irq_chip intel_ir_chip_post_msi = {
.name = "INTEL-IR-POST",
- .irq_ack = dummy_ack,
+ .irq_ack = irq_move_irq,
.irq_set_affinity = intel_ir_set_affinity,
.irq_compose_msi_msg = intel_ir_compose_msi_msg,
.irq_set_vcpu_affinity = intel_ir_set_vcpu_affinity,
diff --git a/drivers/iommu/intel/nested.c b/drivers/iommu/intel/nested.c
index aba92c00b427..6ac5c534bef4 100644
--- a/drivers/iommu/intel/nested.c
+++ b/drivers/iommu/intel/nested.c
@@ -198,7 +198,7 @@ intel_iommu_domain_alloc_nested(struct device *dev, struct iommu_domain *parent,
struct dmar_domain *domain;
int ret;
- if (!nested_supported(iommu) || flags)
+ if (!nested_supported(iommu) || flags & ~IOMMU_HWPT_ALLOC_PASID)
return ERR_PTR(-EOPNOTSUPP);
/* Must be nested domain */
diff --git a/drivers/iommu/iommu-priv.h b/drivers/iommu/iommu-priv.h
index 05fa6e682e88..e236b932e766 100644
--- a/drivers/iommu/iommu-priv.h
+++ b/drivers/iommu/iommu-priv.h
@@ -5,6 +5,7 @@
#define __LINUX_IOMMU_PRIV_H
#include <linux/iommu.h>
+#include <linux/msi.h>
static inline const struct iommu_ops *dev_iommu_ops(struct device *dev)
{
@@ -47,4 +48,19 @@ void iommu_detach_group_handle(struct iommu_domain *domain,
int iommu_replace_group_handle(struct iommu_group *group,
struct iommu_domain *new_domain,
struct iommu_attach_handle *handle);
+
+#if IS_ENABLED(CONFIG_IOMMUFD_DRIVER_CORE) && IS_ENABLED(CONFIG_IRQ_MSI_IOMMU)
+int iommufd_sw_msi(struct iommu_domain *domain, struct msi_desc *desc,
+ phys_addr_t msi_addr);
+#else /* !CONFIG_IOMMUFD_DRIVER_CORE || !CONFIG_IRQ_MSI_IOMMU */
+static inline int iommufd_sw_msi(struct iommu_domain *domain,
+ struct msi_desc *desc, phys_addr_t msi_addr)
+{
+ return -EOPNOTSUPP;
+}
+#endif /* CONFIG_IOMMUFD_DRIVER_CORE && CONFIG_IRQ_MSI_IOMMU */
+
+int iommu_replace_device_pasid(struct iommu_domain *domain,
+ struct device *dev, ioasid_t pasid,
+ struct iommu_attach_handle *handle);
#endif /* __LINUX_IOMMU_PRIV_H */
diff --git a/drivers/iommu/iommu-sva.c b/drivers/iommu/iommu-sva.c
index 503c5d23c1ea..ab18bc494eef 100644
--- a/drivers/iommu/iommu-sva.c
+++ b/drivers/iommu/iommu-sva.c
@@ -310,6 +310,7 @@ static struct iommu_domain *iommu_sva_domain_alloc(struct device *dev,
}
domain->type = IOMMU_DOMAIN_SVA;
+ domain->cookie_type = IOMMU_COOKIE_SVA;
mmgrab(mm);
domain->mm = mm;
domain->owner = ops;
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 9e1b444246f8..4f91a740c15f 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -18,6 +18,7 @@
#include <linux/errno.h>
#include <linux/host1x_context_bus.h>
#include <linux/iommu.h>
+#include <linux/iommufd.h>
#include <linux/idr.h>
#include <linux/err.h>
#include <linux/pci.h>
@@ -537,6 +538,16 @@ static void iommu_deinit_device(struct device *dev)
dev->iommu_group = NULL;
module_put(ops->owner);
dev_iommu_free(dev);
+#ifdef CONFIG_IOMMU_DMA
+ dev->dma_iommu = false;
+#endif
+}
+
+static struct iommu_domain *pasid_array_entry_to_domain(void *entry)
+{
+ if (xa_pointer_tag(entry) == IOMMU_PASID_ARRAY_DOMAIN)
+ return xa_untag_pointer(entry);
+ return ((struct iommu_attach_handle *)xa_untag_pointer(entry))->domain;
}
DEFINE_MUTEX(iommu_probe_device_lock);
@@ -1973,8 +1984,10 @@ void iommu_set_fault_handler(struct iommu_domain *domain,
iommu_fault_handler_t handler,
void *token)
{
- BUG_ON(!domain);
+ if (WARN_ON(!domain || domain->cookie_type != IOMMU_COOKIE_NONE))
+ return;
+ domain->cookie_type = IOMMU_COOKIE_FAULT_HANDLER;
domain->handler = handler;
domain->handler_token = token;
}
@@ -2044,9 +2057,19 @@ EXPORT_SYMBOL_GPL(iommu_paging_domain_alloc_flags);
void iommu_domain_free(struct iommu_domain *domain)
{
- if (domain->type == IOMMU_DOMAIN_SVA)
+ switch (domain->cookie_type) {
+ case IOMMU_COOKIE_DMA_IOVA:
+ iommu_put_dma_cookie(domain);
+ break;
+ case IOMMU_COOKIE_DMA_MSI:
+ iommu_put_msi_cookie(domain);
+ break;
+ case IOMMU_COOKIE_SVA:
mmdrop(domain->mm);
- iommu_put_dma_cookie(domain);
+ break;
+ default:
+ break;
+ }
if (domain->ops->free)
domain->ops->free(domain);
}
@@ -2697,7 +2720,8 @@ int report_iommu_fault(struct iommu_domain *domain, struct device *dev,
* if upper layers showed interest and installed a fault handler,
* invoke it.
*/
- if (domain->handler)
+ if (domain->cookie_type == IOMMU_COOKIE_FAULT_HANDLER &&
+ domain->handler)
ret = domain->handler(domain, dev, iova, flags,
domain->handler_token);
@@ -3335,14 +3359,15 @@ static void iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid,
}
static int __iommu_set_group_pasid(struct iommu_domain *domain,
- struct iommu_group *group, ioasid_t pasid)
+ struct iommu_group *group, ioasid_t pasid,
+ struct iommu_domain *old)
{
struct group_device *device, *last_gdev;
int ret;
for_each_group_device(group, device) {
ret = domain->ops->set_dev_pasid(domain, device->dev,
- pasid, NULL);
+ pasid, old);
if (ret)
goto err_revert;
}
@@ -3354,7 +3379,15 @@ err_revert:
for_each_group_device(group, device) {
if (device == last_gdev)
break;
- iommu_remove_dev_pasid(device->dev, pasid, domain);
+ /*
+ * If no old domain, undo the succeeded devices/pasid.
+ * Otherwise, rollback the succeeded devices/pasid to the old
+ * domain. And it is a driver bug to fail attaching with a
+ * previously good domain.
+ */
+ if (!old || WARN_ON(old->ops->set_dev_pasid(old, device->dev,
+ pasid, domain)))
+ iommu_remove_dev_pasid(device->dev, pasid, domain);
}
return ret;
}
@@ -3376,6 +3409,9 @@ static void __iommu_remove_group_pasid(struct iommu_group *group,
* @pasid: the pasid of the device.
* @handle: the attach handle.
*
+ * Caller should always provide a new handle to avoid race with the paths
+ * that have lockless reference to handle if it intends to pass a valid handle.
+ *
* Return: 0 on success, or an error.
*/
int iommu_attach_device_pasid(struct iommu_domain *domain,
@@ -3420,7 +3456,7 @@ int iommu_attach_device_pasid(struct iommu_domain *domain,
if (ret)
goto out_unlock;
- ret = __iommu_set_group_pasid(domain, group, pasid);
+ ret = __iommu_set_group_pasid(domain, group, pasid, NULL);
if (ret) {
xa_release(&group->pasid_array, pasid);
goto out_unlock;
@@ -3441,6 +3477,97 @@ out_unlock:
}
EXPORT_SYMBOL_GPL(iommu_attach_device_pasid);
+/**
+ * iommu_replace_device_pasid - Replace the domain that a specific pasid
+ * of the device is attached to
+ * @domain: the new iommu domain
+ * @dev: the attached device.
+ * @pasid: the pasid of the device.
+ * @handle: the attach handle.
+ *
+ * This API allows the pasid to switch domains. The @pasid should have been
+ * attached. Otherwise, this fails. The pasid will keep the old configuration
+ * if replacement failed.
+ *
+ * Caller should always provide a new handle to avoid race with the paths
+ * that have lockless reference to handle if it intends to pass a valid handle.
+ *
+ * Return 0 on success, or an error.
+ */
+int iommu_replace_device_pasid(struct iommu_domain *domain,
+ struct device *dev, ioasid_t pasid,
+ struct iommu_attach_handle *handle)
+{
+ /* Caller must be a probed driver on dev */
+ struct iommu_group *group = dev->iommu_group;
+ struct iommu_attach_handle *entry;
+ struct iommu_domain *curr_domain;
+ void *curr;
+ int ret;
+
+ if (!group)
+ return -ENODEV;
+
+ if (!domain->ops->set_dev_pasid)
+ return -EOPNOTSUPP;
+
+ if (dev_iommu_ops(dev) != domain->owner ||
+ pasid == IOMMU_NO_PASID || !handle)
+ return -EINVAL;
+
+ mutex_lock(&group->mutex);
+ entry = iommu_make_pasid_array_entry(domain, handle);
+ curr = xa_cmpxchg(&group->pasid_array, pasid, NULL,
+ XA_ZERO_ENTRY, GFP_KERNEL);
+ if (xa_is_err(curr)) {
+ ret = xa_err(curr);
+ goto out_unlock;
+ }
+
+ /*
+ * No domain (with or without handle) attached, hence not
+ * a replace case.
+ */
+ if (!curr) {
+ xa_release(&group->pasid_array, pasid);
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+
+ /*
+ * Reusing handle is problematic as there are paths that refers
+ * the handle without lock. To avoid race, reject the callers that
+ * attempt it.
+ */
+ if (curr == entry) {
+ WARN_ON(1);
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+
+ curr_domain = pasid_array_entry_to_domain(curr);
+ ret = 0;
+
+ if (curr_domain != domain) {
+ ret = __iommu_set_group_pasid(domain, group,
+ pasid, curr_domain);
+ if (ret)
+ goto out_unlock;
+ }
+
+ /*
+ * The above xa_cmpxchg() reserved the memory, and the
+ * group->mutex is held, this cannot fail.
+ */
+ WARN_ON(xa_is_err(xa_store(&group->pasid_array,
+ pasid, entry, GFP_KERNEL)));
+
+out_unlock:
+ mutex_unlock(&group->mutex);
+ return ret;
+}
+EXPORT_SYMBOL_NS_GPL(iommu_replace_device_pasid, "IOMMUFD_INTERNAL");
+
/*
* iommu_detach_device_pasid() - Detach the domain from pasid of device
* @domain: the iommu domain.
@@ -3536,6 +3663,9 @@ EXPORT_SYMBOL_NS_GPL(iommu_attach_handle_get, "IOMMUFD_INTERNAL");
* This is a variant of iommu_attach_group(). It allows the caller to provide
* an attach handle and use it when the domain is attached. This is currently
* used by IOMMUFD to deliver the I/O page faults.
+ *
+ * Caller should always provide a new handle to avoid race with the paths
+ * that have lockless reference to handle.
*/
int iommu_attach_group_handle(struct iommu_domain *domain,
struct iommu_group *group,
@@ -3605,6 +3735,9 @@ EXPORT_SYMBOL_NS_GPL(iommu_detach_group_handle, "IOMMUFD_INTERNAL");
*
* If the currently attached domain is a core domain (e.g. a default_domain),
* it will act just like the iommu_attach_group_handle().
+ *
+ * Caller should always provide a new handle to avoid race with the paths
+ * that have lockless reference to handle.
*/
int iommu_replace_group_handle(struct iommu_group *group,
struct iommu_domain *new_domain,
@@ -3662,8 +3795,21 @@ int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr)
return 0;
mutex_lock(&group->mutex);
- if (group->domain && group->domain->sw_msi)
- ret = group->domain->sw_msi(group->domain, desc, msi_addr);
+ /* An IDENTITY domain must pass through */
+ if (group->domain && group->domain->type != IOMMU_DOMAIN_IDENTITY) {
+ switch (group->domain->cookie_type) {
+ case IOMMU_COOKIE_DMA_MSI:
+ case IOMMU_COOKIE_DMA_IOVA:
+ ret = iommu_dma_sw_msi(group->domain, desc, msi_addr);
+ break;
+ case IOMMU_COOKIE_IOMMUFD:
+ ret = iommufd_sw_msi(group->domain, desc, msi_addr);
+ break;
+ default:
+ ret = -EOPNOTSUPP;
+ break;
+ }
+ }
mutex_unlock(&group->mutex);
return ret;
}
diff --git a/drivers/iommu/iommufd/Kconfig b/drivers/iommu/iommufd/Kconfig
index 0a07f9449fd9..2beeb4f60ee5 100644
--- a/drivers/iommu/iommufd/Kconfig
+++ b/drivers/iommu/iommufd/Kconfig
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-only
config IOMMUFD_DRIVER_CORE
- tristate
+ bool
default (IOMMUFD_DRIVER || IOMMUFD) if IOMMUFD!=n
config IOMMUFD
diff --git a/drivers/iommu/iommufd/Makefile b/drivers/iommu/iommufd/Makefile
index cb784da6cddc..71d692c9a8f4 100644
--- a/drivers/iommu/iommufd/Makefile
+++ b/drivers/iommu/iommufd/Makefile
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0-only
iommufd-y := \
device.o \
- fault.o \
+ eventq.o \
hw_pagetable.o \
io_pagetable.o \
ioas.o \
diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c
index 4e107f69f951..2111bad72c72 100644
--- a/drivers/iommu/iommufd/device.c
+++ b/drivers/iommu/iommufd/device.c
@@ -3,9 +3,9 @@
*/
#include <linux/iommu.h>
#include <linux/iommufd.h>
+#include <linux/pci-ats.h>
#include <linux/slab.h>
#include <uapi/linux/iommufd.h>
-#include <linux/msi.h>
#include "../iommu-priv.h"
#include "io_pagetable.h"
@@ -18,12 +18,17 @@ MODULE_PARM_DESC(
"Allow IOMMUFD to bind to devices even if the platform cannot isolate "
"the MSI interrupt window. Enabling this is a security weakness.");
+struct iommufd_attach {
+ struct iommufd_hw_pagetable *hwpt;
+ struct xarray device_array;
+};
+
static void iommufd_group_release(struct kref *kref)
{
struct iommufd_group *igroup =
container_of(kref, struct iommufd_group, ref);
- WARN_ON(igroup->hwpt || !list_empty(&igroup->device_list));
+ WARN_ON(!xa_empty(&igroup->pasid_attach));
xa_cmpxchg(&igroup->ictx->groups, iommu_group_id(igroup->group), igroup,
NULL, GFP_KERNEL);
@@ -90,7 +95,7 @@ static struct iommufd_group *iommufd_get_group(struct iommufd_ctx *ictx,
kref_init(&new_igroup->ref);
mutex_init(&new_igroup->lock);
- INIT_LIST_HEAD(&new_igroup->device_list);
+ xa_init(&new_igroup->pasid_attach);
new_igroup->sw_msi_start = PHYS_ADDR_MAX;
/* group reference moves into new_igroup */
new_igroup->group = group;
@@ -294,129 +299,24 @@ u32 iommufd_device_to_id(struct iommufd_device *idev)
}
EXPORT_SYMBOL_NS_GPL(iommufd_device_to_id, "IOMMUFD");
-/*
- * Get a iommufd_sw_msi_map for the msi physical address requested by the irq
- * layer. The mapping to IOVA is global to the iommufd file descriptor, every
- * domain that is attached to a device using the same MSI parameters will use
- * the same IOVA.
- */
-static __maybe_unused struct iommufd_sw_msi_map *
-iommufd_sw_msi_get_map(struct iommufd_ctx *ictx, phys_addr_t msi_addr,
- phys_addr_t sw_msi_start)
-{
- struct iommufd_sw_msi_map *cur;
- unsigned int max_pgoff = 0;
-
- lockdep_assert_held(&ictx->sw_msi_lock);
-
- list_for_each_entry(cur, &ictx->sw_msi_list, sw_msi_item) {
- if (cur->sw_msi_start != sw_msi_start)
- continue;
- max_pgoff = max(max_pgoff, cur->pgoff + 1);
- if (cur->msi_addr == msi_addr)
- return cur;
- }
-
- if (ictx->sw_msi_id >=
- BITS_PER_BYTE * sizeof_field(struct iommufd_sw_msi_maps, bitmap))
- return ERR_PTR(-EOVERFLOW);
-
- cur = kzalloc(sizeof(*cur), GFP_KERNEL);
- if (!cur)
- return ERR_PTR(-ENOMEM);
-
- cur->sw_msi_start = sw_msi_start;
- cur->msi_addr = msi_addr;
- cur->pgoff = max_pgoff;
- cur->id = ictx->sw_msi_id++;
- list_add_tail(&cur->sw_msi_item, &ictx->sw_msi_list);
- return cur;
-}
-
-static int iommufd_sw_msi_install(struct iommufd_ctx *ictx,
- struct iommufd_hwpt_paging *hwpt_paging,
- struct iommufd_sw_msi_map *msi_map)
+static unsigned int iommufd_group_device_num(struct iommufd_group *igroup,
+ ioasid_t pasid)
{
- unsigned long iova;
-
- lockdep_assert_held(&ictx->sw_msi_lock);
+ struct iommufd_attach *attach;
+ struct iommufd_device *idev;
+ unsigned int count = 0;
+ unsigned long index;
- iova = msi_map->sw_msi_start + msi_map->pgoff * PAGE_SIZE;
- if (!test_bit(msi_map->id, hwpt_paging->present_sw_msi.bitmap)) {
- int rc;
+ lockdep_assert_held(&igroup->lock);
- rc = iommu_map(hwpt_paging->common.domain, iova,
- msi_map->msi_addr, PAGE_SIZE,
- IOMMU_WRITE | IOMMU_READ | IOMMU_MMIO,
- GFP_KERNEL_ACCOUNT);
- if (rc)
- return rc;
- __set_bit(msi_map->id, hwpt_paging->present_sw_msi.bitmap);
- }
- return 0;
+ attach = xa_load(&igroup->pasid_attach, pasid);
+ if (attach)
+ xa_for_each(&attach->device_array, index, idev)
+ count++;
+ return count;
}
-/*
- * Called by the irq code if the platform translates the MSI address through the
- * IOMMU. msi_addr is the physical address of the MSI page. iommufd will
- * allocate a fd global iova for the physical page that is the same on all
- * domains and devices.
- */
#ifdef CONFIG_IRQ_MSI_IOMMU
-int iommufd_sw_msi(struct iommu_domain *domain, struct msi_desc *desc,
- phys_addr_t msi_addr)
-{
- struct device *dev = msi_desc_to_dev(desc);
- struct iommufd_hwpt_paging *hwpt_paging;
- struct iommu_attach_handle *raw_handle;
- struct iommufd_attach_handle *handle;
- struct iommufd_sw_msi_map *msi_map;
- struct iommufd_ctx *ictx;
- unsigned long iova;
- int rc;
-
- /*
- * It is safe to call iommu_attach_handle_get() here because the iommu
- * core code invokes this under the group mutex which also prevents any
- * change of the attach handle for the duration of this function.
- */
- iommu_group_mutex_assert(dev);
-
- raw_handle =
- iommu_attach_handle_get(dev->iommu_group, IOMMU_NO_PASID, 0);
- if (IS_ERR(raw_handle))
- return 0;
- hwpt_paging = find_hwpt_paging(domain->iommufd_hwpt);
-
- handle = to_iommufd_handle(raw_handle);
- /* No IOMMU_RESV_SW_MSI means no change to the msi_msg */
- if (handle->idev->igroup->sw_msi_start == PHYS_ADDR_MAX)
- return 0;
-
- ictx = handle->idev->ictx;
- guard(mutex)(&ictx->sw_msi_lock);
- /*
- * The input msi_addr is the exact byte offset of the MSI doorbell, we
- * assume the caller has checked that it is contained with a MMIO region
- * that is secure to map at PAGE_SIZE.
- */
- msi_map = iommufd_sw_msi_get_map(handle->idev->ictx,
- msi_addr & PAGE_MASK,
- handle->idev->igroup->sw_msi_start);
- if (IS_ERR(msi_map))
- return PTR_ERR(msi_map);
-
- rc = iommufd_sw_msi_install(ictx, hwpt_paging, msi_map);
- if (rc)
- return rc;
- __set_bit(msi_map->id, handle->idev->igroup->required_sw_msi.bitmap);
-
- iova = msi_map->sw_msi_start + msi_map->pgoff * PAGE_SIZE;
- msi_desc_set_iommu_msi_iova(desc, iova, PAGE_SHIFT);
- return 0;
-}
-#endif
-
static int iommufd_group_setup_msi(struct iommufd_group *igroup,
struct iommufd_hwpt_paging *hwpt_paging)
{
@@ -443,23 +343,39 @@ static int iommufd_group_setup_msi(struct iommufd_group *igroup,
}
return 0;
}
+#else
+static inline int
+iommufd_group_setup_msi(struct iommufd_group *igroup,
+ struct iommufd_hwpt_paging *hwpt_paging)
+{
+ return 0;
+}
+#endif
+
+static bool
+iommufd_group_first_attach(struct iommufd_group *igroup, ioasid_t pasid)
+{
+ lockdep_assert_held(&igroup->lock);
+ return !xa_load(&igroup->pasid_attach, pasid);
+}
static int
iommufd_device_attach_reserved_iova(struct iommufd_device *idev,
struct iommufd_hwpt_paging *hwpt_paging)
{
+ struct iommufd_group *igroup = idev->igroup;
int rc;
- lockdep_assert_held(&idev->igroup->lock);
+ lockdep_assert_held(&igroup->lock);
rc = iopt_table_enforce_dev_resv_regions(&hwpt_paging->ioas->iopt,
idev->dev,
- &idev->igroup->sw_msi_start);
+ &igroup->sw_msi_start);
if (rc)
return rc;
- if (list_empty(&idev->igroup->device_list)) {
- rc = iommufd_group_setup_msi(idev->igroup, hwpt_paging);
+ if (iommufd_group_first_attach(igroup, IOMMU_NO_PASID)) {
+ rc = iommufd_group_setup_msi(igroup, hwpt_paging);
if (rc) {
iopt_remove_reserved_iova(&hwpt_paging->ioas->iopt,
idev->dev);
@@ -471,13 +387,54 @@ iommufd_device_attach_reserved_iova(struct iommufd_device *idev,
/* The device attach/detach/replace helpers for attach_handle */
+static bool iommufd_device_is_attached(struct iommufd_device *idev,
+ ioasid_t pasid)
+{
+ struct iommufd_attach *attach;
+
+ attach = xa_load(&idev->igroup->pasid_attach, pasid);
+ return xa_load(&attach->device_array, idev->obj.id);
+}
+
+static int iommufd_hwpt_pasid_compat(struct iommufd_hw_pagetable *hwpt,
+ struct iommufd_device *idev,
+ ioasid_t pasid)
+{
+ struct iommufd_group *igroup = idev->igroup;
+
+ lockdep_assert_held(&igroup->lock);
+
+ if (pasid == IOMMU_NO_PASID) {
+ unsigned long start = IOMMU_NO_PASID;
+
+ if (!hwpt->pasid_compat &&
+ xa_find_after(&igroup->pasid_attach,
+ &start, UINT_MAX, XA_PRESENT))
+ return -EINVAL;
+ } else {
+ struct iommufd_attach *attach;
+
+ if (!hwpt->pasid_compat)
+ return -EINVAL;
+
+ attach = xa_load(&igroup->pasid_attach, IOMMU_NO_PASID);
+ if (attach && attach->hwpt && !attach->hwpt->pasid_compat)
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static int iommufd_hwpt_attach_device(struct iommufd_hw_pagetable *hwpt,
- struct iommufd_device *idev)
+ struct iommufd_device *idev,
+ ioasid_t pasid)
{
struct iommufd_attach_handle *handle;
int rc;
- lockdep_assert_held(&idev->igroup->lock);
+ rc = iommufd_hwpt_pasid_compat(hwpt, idev, pasid);
+ if (rc)
+ return rc;
handle = kzalloc(sizeof(*handle), GFP_KERNEL);
if (!handle)
@@ -490,8 +447,12 @@ static int iommufd_hwpt_attach_device(struct iommufd_hw_pagetable *hwpt,
}
handle->idev = idev;
- rc = iommu_attach_group_handle(hwpt->domain, idev->igroup->group,
- &handle->handle);
+ if (pasid == IOMMU_NO_PASID)
+ rc = iommu_attach_group_handle(hwpt->domain, idev->igroup->group,
+ &handle->handle);
+ else
+ rc = iommu_attach_device_pasid(hwpt->domain, idev->dev, pasid,
+ &handle->handle);
if (rc)
goto out_disable_iopf;
@@ -506,26 +467,31 @@ out_free_handle:
}
static struct iommufd_attach_handle *
-iommufd_device_get_attach_handle(struct iommufd_device *idev)
+iommufd_device_get_attach_handle(struct iommufd_device *idev, ioasid_t pasid)
{
struct iommu_attach_handle *handle;
lockdep_assert_held(&idev->igroup->lock);
handle =
- iommu_attach_handle_get(idev->igroup->group, IOMMU_NO_PASID, 0);
+ iommu_attach_handle_get(idev->igroup->group, pasid, 0);
if (IS_ERR(handle))
return NULL;
return to_iommufd_handle(handle);
}
static void iommufd_hwpt_detach_device(struct iommufd_hw_pagetable *hwpt,
- struct iommufd_device *idev)
+ struct iommufd_device *idev,
+ ioasid_t pasid)
{
struct iommufd_attach_handle *handle;
- handle = iommufd_device_get_attach_handle(idev);
- iommu_detach_group_handle(hwpt->domain, idev->igroup->group);
+ handle = iommufd_device_get_attach_handle(idev, pasid);
+ if (pasid == IOMMU_NO_PASID)
+ iommu_detach_group_handle(hwpt->domain, idev->igroup->group);
+ else
+ iommu_detach_device_pasid(hwpt->domain, idev->dev, pasid);
+
if (hwpt->fault) {
iommufd_auto_response_faults(hwpt, handle);
iommufd_fault_iopf_disable(idev);
@@ -534,13 +500,19 @@ static void iommufd_hwpt_detach_device(struct iommufd_hw_pagetable *hwpt,
}
static int iommufd_hwpt_replace_device(struct iommufd_device *idev,
+ ioasid_t pasid,
struct iommufd_hw_pagetable *hwpt,
struct iommufd_hw_pagetable *old)
{
- struct iommufd_attach_handle *handle, *old_handle =
- iommufd_device_get_attach_handle(idev);
+ struct iommufd_attach_handle *handle, *old_handle;
int rc;
+ rc = iommufd_hwpt_pasid_compat(hwpt, idev, pasid);
+ if (rc)
+ return rc;
+
+ old_handle = iommufd_device_get_attach_handle(idev, pasid);
+
handle = kzalloc(sizeof(*handle), GFP_KERNEL);
if (!handle)
return -ENOMEM;
@@ -552,8 +524,12 @@ static int iommufd_hwpt_replace_device(struct iommufd_device *idev,
}
handle->idev = idev;
- rc = iommu_replace_group_handle(idev->igroup->group, hwpt->domain,
- &handle->handle);
+ if (pasid == IOMMU_NO_PASID)
+ rc = iommu_replace_group_handle(idev->igroup->group,
+ hwpt->domain, &handle->handle);
+ else
+ rc = iommu_replace_device_pasid(hwpt->domain, idev->dev,
+ pasid, &handle->handle);
if (rc)
goto out_disable_iopf;
@@ -575,22 +551,51 @@ out_free_handle:
}
int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt,
- struct iommufd_device *idev)
+ struct iommufd_device *idev, ioasid_t pasid)
{
struct iommufd_hwpt_paging *hwpt_paging = find_hwpt_paging(hwpt);
+ bool attach_resv = hwpt_paging && pasid == IOMMU_NO_PASID;
+ struct iommufd_group *igroup = idev->igroup;
+ struct iommufd_hw_pagetable *old_hwpt;
+ struct iommufd_attach *attach;
int rc;
- mutex_lock(&idev->igroup->lock);
+ mutex_lock(&igroup->lock);
- if (idev->igroup->hwpt != NULL && idev->igroup->hwpt != hwpt) {
- rc = -EINVAL;
+ attach = xa_cmpxchg(&igroup->pasid_attach, pasid, NULL,
+ XA_ZERO_ENTRY, GFP_KERNEL);
+ if (xa_is_err(attach)) {
+ rc = xa_err(attach);
goto err_unlock;
}
- if (hwpt_paging) {
+ if (!attach) {
+ attach = kzalloc(sizeof(*attach), GFP_KERNEL);
+ if (!attach) {
+ rc = -ENOMEM;
+ goto err_release_pasid;
+ }
+ xa_init(&attach->device_array);
+ }
+
+ old_hwpt = attach->hwpt;
+
+ rc = xa_insert(&attach->device_array, idev->obj.id, XA_ZERO_ENTRY,
+ GFP_KERNEL);
+ if (rc) {
+ WARN_ON(rc == -EBUSY && !old_hwpt);
+ goto err_free_attach;
+ }
+
+ if (old_hwpt && old_hwpt != hwpt) {
+ rc = -EINVAL;
+ goto err_release_devid;
+ }
+
+ if (attach_resv) {
rc = iommufd_device_attach_reserved_iova(idev, hwpt_paging);
if (rc)
- goto err_unlock;
+ goto err_release_devid;
}
/*
@@ -600,51 +605,74 @@ int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt,
* reserved regions are only updated during individual device
* attachment.
*/
- if (list_empty(&idev->igroup->device_list)) {
- rc = iommufd_hwpt_attach_device(hwpt, idev);
+ if (iommufd_group_first_attach(igroup, pasid)) {
+ rc = iommufd_hwpt_attach_device(hwpt, idev, pasid);
if (rc)
goto err_unresv;
- idev->igroup->hwpt = hwpt;
+ attach->hwpt = hwpt;
+ WARN_ON(xa_is_err(xa_store(&igroup->pasid_attach, pasid, attach,
+ GFP_KERNEL)));
}
refcount_inc(&hwpt->obj.users);
- list_add_tail(&idev->group_item, &idev->igroup->device_list);
- mutex_unlock(&idev->igroup->lock);
+ WARN_ON(xa_is_err(xa_store(&attach->device_array, idev->obj.id,
+ idev, GFP_KERNEL)));
+ mutex_unlock(&igroup->lock);
return 0;
err_unresv:
- if (hwpt_paging)
+ if (attach_resv)
iopt_remove_reserved_iova(&hwpt_paging->ioas->iopt, idev->dev);
+err_release_devid:
+ xa_release(&attach->device_array, idev->obj.id);
+err_free_attach:
+ if (iommufd_group_first_attach(igroup, pasid))
+ kfree(attach);
+err_release_pasid:
+ if (iommufd_group_first_attach(igroup, pasid))
+ xa_release(&igroup->pasid_attach, pasid);
err_unlock:
- mutex_unlock(&idev->igroup->lock);
+ mutex_unlock(&igroup->lock);
return rc;
}
struct iommufd_hw_pagetable *
-iommufd_hw_pagetable_detach(struct iommufd_device *idev)
+iommufd_hw_pagetable_detach(struct iommufd_device *idev, ioasid_t pasid)
{
- struct iommufd_hw_pagetable *hwpt = idev->igroup->hwpt;
- struct iommufd_hwpt_paging *hwpt_paging = find_hwpt_paging(hwpt);
+ struct iommufd_group *igroup = idev->igroup;
+ struct iommufd_hwpt_paging *hwpt_paging;
+ struct iommufd_hw_pagetable *hwpt;
+ struct iommufd_attach *attach;
+
+ mutex_lock(&igroup->lock);
+ attach = xa_load(&igroup->pasid_attach, pasid);
+ if (!attach) {
+ mutex_unlock(&igroup->lock);
+ return NULL;
+ }
- mutex_lock(&idev->igroup->lock);
- list_del(&idev->group_item);
- if (list_empty(&idev->igroup->device_list)) {
- iommufd_hwpt_detach_device(hwpt, idev);
- idev->igroup->hwpt = NULL;
+ hwpt = attach->hwpt;
+ hwpt_paging = find_hwpt_paging(hwpt);
+
+ xa_erase(&attach->device_array, idev->obj.id);
+ if (xa_empty(&attach->device_array)) {
+ iommufd_hwpt_detach_device(hwpt, idev, pasid);
+ xa_erase(&igroup->pasid_attach, pasid);
+ kfree(attach);
}
- if (hwpt_paging)
+ if (hwpt_paging && pasid == IOMMU_NO_PASID)
iopt_remove_reserved_iova(&hwpt_paging->ioas->iopt, idev->dev);
- mutex_unlock(&idev->igroup->lock);
+ mutex_unlock(&igroup->lock);
/* Caller must destroy hwpt */
return hwpt;
}
static struct iommufd_hw_pagetable *
-iommufd_device_do_attach(struct iommufd_device *idev,
+iommufd_device_do_attach(struct iommufd_device *idev, ioasid_t pasid,
struct iommufd_hw_pagetable *hwpt)
{
int rc;
- rc = iommufd_hw_pagetable_attach(hwpt, idev);
+ rc = iommufd_hw_pagetable_attach(hwpt, idev, pasid);
if (rc)
return ERR_PTR(rc);
return NULL;
@@ -654,11 +682,14 @@ static void
iommufd_group_remove_reserved_iova(struct iommufd_group *igroup,
struct iommufd_hwpt_paging *hwpt_paging)
{
+ struct iommufd_attach *attach;
struct iommufd_device *cur;
+ unsigned long index;
lockdep_assert_held(&igroup->lock);
- list_for_each_entry(cur, &igroup->device_list, group_item)
+ attach = xa_load(&igroup->pasid_attach, IOMMU_NO_PASID);
+ xa_for_each(&attach->device_array, index, cur)
iopt_remove_reserved_iova(&hwpt_paging->ioas->iopt, cur->dev);
}
@@ -667,14 +698,17 @@ iommufd_group_do_replace_reserved_iova(struct iommufd_group *igroup,
struct iommufd_hwpt_paging *hwpt_paging)
{
struct iommufd_hwpt_paging *old_hwpt_paging;
+ struct iommufd_attach *attach;
struct iommufd_device *cur;
+ unsigned long index;
int rc;
lockdep_assert_held(&igroup->lock);
- old_hwpt_paging = find_hwpt_paging(igroup->hwpt);
+ attach = xa_load(&igroup->pasid_attach, IOMMU_NO_PASID);
+ old_hwpt_paging = find_hwpt_paging(attach->hwpt);
if (!old_hwpt_paging || hwpt_paging->ioas != old_hwpt_paging->ioas) {
- list_for_each_entry(cur, &igroup->device_list, group_item) {
+ xa_for_each(&attach->device_array, index, cur) {
rc = iopt_table_enforce_dev_resv_regions(
&hwpt_paging->ioas->iopt, cur->dev, NULL);
if (rc)
@@ -693,69 +727,81 @@ err_unresv:
}
static struct iommufd_hw_pagetable *
-iommufd_device_do_replace(struct iommufd_device *idev,
+iommufd_device_do_replace(struct iommufd_device *idev, ioasid_t pasid,
struct iommufd_hw_pagetable *hwpt)
{
struct iommufd_hwpt_paging *hwpt_paging = find_hwpt_paging(hwpt);
+ bool attach_resv = hwpt_paging && pasid == IOMMU_NO_PASID;
struct iommufd_hwpt_paging *old_hwpt_paging;
struct iommufd_group *igroup = idev->igroup;
struct iommufd_hw_pagetable *old_hwpt;
+ struct iommufd_attach *attach;
unsigned int num_devices;
int rc;
- mutex_lock(&idev->igroup->lock);
+ mutex_lock(&igroup->lock);
+
+ attach = xa_load(&igroup->pasid_attach, pasid);
+ if (!attach) {
+ rc = -EINVAL;
+ goto err_unlock;
+ }
+
+ old_hwpt = attach->hwpt;
- if (igroup->hwpt == NULL) {
+ WARN_ON(!old_hwpt || xa_empty(&attach->device_array));
+
+ if (!iommufd_device_is_attached(idev, pasid)) {
rc = -EINVAL;
goto err_unlock;
}
- if (hwpt == igroup->hwpt) {
- mutex_unlock(&idev->igroup->lock);
+ if (hwpt == old_hwpt) {
+ mutex_unlock(&igroup->lock);
return NULL;
}
- old_hwpt = igroup->hwpt;
- if (hwpt_paging) {
+ if (attach_resv) {
rc = iommufd_group_do_replace_reserved_iova(igroup, hwpt_paging);
if (rc)
goto err_unlock;
}
- rc = iommufd_hwpt_replace_device(idev, hwpt, old_hwpt);
+ rc = iommufd_hwpt_replace_device(idev, pasid, hwpt, old_hwpt);
if (rc)
goto err_unresv;
old_hwpt_paging = find_hwpt_paging(old_hwpt);
- if (old_hwpt_paging &&
+ if (old_hwpt_paging && pasid == IOMMU_NO_PASID &&
(!hwpt_paging || hwpt_paging->ioas != old_hwpt_paging->ioas))
iommufd_group_remove_reserved_iova(igroup, old_hwpt_paging);
- igroup->hwpt = hwpt;
+ attach->hwpt = hwpt;
- num_devices = list_count_nodes(&igroup->device_list);
+ num_devices = iommufd_group_device_num(igroup, pasid);
/*
- * Move the refcounts held by the device_list to the new hwpt. Retain a
+ * Move the refcounts held by the device_array to the new hwpt. Retain a
* refcount for this thread as the caller will free it.
*/
refcount_add(num_devices, &hwpt->obj.users);
if (num_devices > 1)
WARN_ON(refcount_sub_and_test(num_devices - 1,
&old_hwpt->obj.users));
- mutex_unlock(&idev->igroup->lock);
+ mutex_unlock(&igroup->lock);
/* Caller must destroy old_hwpt */
return old_hwpt;
err_unresv:
- if (hwpt_paging)
+ if (attach_resv)
iommufd_group_remove_reserved_iova(igroup, hwpt_paging);
err_unlock:
- mutex_unlock(&idev->igroup->lock);
+ mutex_unlock(&igroup->lock);
return ERR_PTR(rc);
}
typedef struct iommufd_hw_pagetable *(*attach_fn)(
- struct iommufd_device *idev, struct iommufd_hw_pagetable *hwpt);
+ struct iommufd_device *idev, ioasid_t pasid,
+ struct iommufd_hw_pagetable *hwpt);
/*
* When automatically managing the domains we search for a compatible domain in
@@ -763,7 +809,7 @@ typedef struct iommufd_hw_pagetable *(*attach_fn)(
* Automatic domain selection will never pick a manually created domain.
*/
static struct iommufd_hw_pagetable *
-iommufd_device_auto_get_domain(struct iommufd_device *idev,
+iommufd_device_auto_get_domain(struct iommufd_device *idev, ioasid_t pasid,
struct iommufd_ioas *ioas, u32 *pt_id,
attach_fn do_attach)
{
@@ -792,7 +838,7 @@ iommufd_device_auto_get_domain(struct iommufd_device *idev,
hwpt = &hwpt_paging->common;
if (!iommufd_lock_obj(&hwpt->obj))
continue;
- destroy_hwpt = (*do_attach)(idev, hwpt);
+ destroy_hwpt = (*do_attach)(idev, pasid, hwpt);
if (IS_ERR(destroy_hwpt)) {
iommufd_put_object(idev->ictx, &hwpt->obj);
/*
@@ -810,8 +856,8 @@ iommufd_device_auto_get_domain(struct iommufd_device *idev,
goto out_unlock;
}
- hwpt_paging = iommufd_hwpt_paging_alloc(idev->ictx, ioas, idev, 0,
- immediate_attach, NULL);
+ hwpt_paging = iommufd_hwpt_paging_alloc(idev->ictx, ioas, idev, pasid,
+ 0, immediate_attach, NULL);
if (IS_ERR(hwpt_paging)) {
destroy_hwpt = ERR_CAST(hwpt_paging);
goto out_unlock;
@@ -819,7 +865,7 @@ iommufd_device_auto_get_domain(struct iommufd_device *idev,
hwpt = &hwpt_paging->common;
if (!immediate_attach) {
- destroy_hwpt = (*do_attach)(idev, hwpt);
+ destroy_hwpt = (*do_attach)(idev, pasid, hwpt);
if (IS_ERR(destroy_hwpt))
goto out_abort;
} else {
@@ -840,8 +886,9 @@ out_unlock:
return destroy_hwpt;
}
-static int iommufd_device_change_pt(struct iommufd_device *idev, u32 *pt_id,
- attach_fn do_attach)
+static int iommufd_device_change_pt(struct iommufd_device *idev,
+ ioasid_t pasid,
+ u32 *pt_id, attach_fn do_attach)
{
struct iommufd_hw_pagetable *destroy_hwpt;
struct iommufd_object *pt_obj;
@@ -856,7 +903,7 @@ static int iommufd_device_change_pt(struct iommufd_device *idev, u32 *pt_id,
struct iommufd_hw_pagetable *hwpt =
container_of(pt_obj, struct iommufd_hw_pagetable, obj);
- destroy_hwpt = (*do_attach)(idev, hwpt);
+ destroy_hwpt = (*do_attach)(idev, pasid, hwpt);
if (IS_ERR(destroy_hwpt))
goto out_put_pt_obj;
break;
@@ -865,8 +912,8 @@ static int iommufd_device_change_pt(struct iommufd_device *idev, u32 *pt_id,
struct iommufd_ioas *ioas =
container_of(pt_obj, struct iommufd_ioas, obj);
- destroy_hwpt = iommufd_device_auto_get_domain(idev, ioas, pt_id,
- do_attach);
+ destroy_hwpt = iommufd_device_auto_get_domain(idev, pasid, ioas,
+ pt_id, do_attach);
if (IS_ERR(destroy_hwpt))
goto out_put_pt_obj;
break;
@@ -888,22 +935,26 @@ out_put_pt_obj:
}
/**
- * iommufd_device_attach - Connect a device to an iommu_domain
+ * iommufd_device_attach - Connect a device/pasid to an iommu_domain
* @idev: device to attach
+ * @pasid: pasid to attach
* @pt_id: Input a IOMMUFD_OBJ_IOAS, or IOMMUFD_OBJ_HWPT_PAGING
* Output the IOMMUFD_OBJ_HWPT_PAGING ID
*
- * This connects the device to an iommu_domain, either automatically or manually
- * selected. Once this completes the device could do DMA.
+ * This connects the device/pasid to an iommu_domain, either automatically
+ * or manually selected. Once this completes the device could do DMA with
+ * @pasid. @pasid is IOMMU_NO_PASID if this attach is for no pasid usage.
*
* The caller should return the resulting pt_id back to userspace.
* This function is undone by calling iommufd_device_detach().
*/
-int iommufd_device_attach(struct iommufd_device *idev, u32 *pt_id)
+int iommufd_device_attach(struct iommufd_device *idev, ioasid_t pasid,
+ u32 *pt_id)
{
int rc;
- rc = iommufd_device_change_pt(idev, pt_id, &iommufd_device_do_attach);
+ rc = iommufd_device_change_pt(idev, pasid, pt_id,
+ &iommufd_device_do_attach);
if (rc)
return rc;
@@ -917,8 +968,9 @@ int iommufd_device_attach(struct iommufd_device *idev, u32 *pt_id)
EXPORT_SYMBOL_NS_GPL(iommufd_device_attach, "IOMMUFD");
/**
- * iommufd_device_replace - Change the device's iommu_domain
+ * iommufd_device_replace - Change the device/pasid's iommu_domain
* @idev: device to change
+ * @pasid: pasid to change
* @pt_id: Input a IOMMUFD_OBJ_IOAS, or IOMMUFD_OBJ_HWPT_PAGING
* Output the IOMMUFD_OBJ_HWPT_PAGING ID
*
@@ -929,27 +981,33 @@ EXPORT_SYMBOL_NS_GPL(iommufd_device_attach, "IOMMUFD");
*
* If it fails then no change is made to the attachment. The iommu driver may
* implement this so there is no disruption in translation. This can only be
- * called if iommufd_device_attach() has already succeeded.
+ * called if iommufd_device_attach() has already succeeded. @pasid is
+ * IOMMU_NO_PASID for no pasid usage.
*/
-int iommufd_device_replace(struct iommufd_device *idev, u32 *pt_id)
+int iommufd_device_replace(struct iommufd_device *idev, ioasid_t pasid,
+ u32 *pt_id)
{
- return iommufd_device_change_pt(idev, pt_id,
+ return iommufd_device_change_pt(idev, pasid, pt_id,
&iommufd_device_do_replace);
}
EXPORT_SYMBOL_NS_GPL(iommufd_device_replace, "IOMMUFD");
/**
- * iommufd_device_detach - Disconnect a device to an iommu_domain
+ * iommufd_device_detach - Disconnect a device/device to an iommu_domain
* @idev: device to detach
+ * @pasid: pasid to detach
*
* Undo iommufd_device_attach(). This disconnects the idev from the previously
* attached pt_id. The device returns back to a blocked DMA translation.
+ * @pasid is IOMMU_NO_PASID for no pasid usage.
*/
-void iommufd_device_detach(struct iommufd_device *idev)
+void iommufd_device_detach(struct iommufd_device *idev, ioasid_t pasid)
{
struct iommufd_hw_pagetable *hwpt;
- hwpt = iommufd_hw_pagetable_detach(idev);
+ hwpt = iommufd_hw_pagetable_detach(idev, pasid);
+ if (!hwpt)
+ return;
iommufd_hw_pagetable_put(idev->ictx, hwpt);
refcount_dec(&idev->obj.users);
}
@@ -1349,7 +1407,7 @@ int iommufd_access_rw(struct iommufd_access *access, unsigned long iova,
struct io_pagetable *iopt;
struct iopt_area *area;
unsigned long last_iova;
- int rc;
+ int rc = -EINVAL;
if (!length)
return -EINVAL;
@@ -1405,7 +1463,8 @@ int iommufd_get_hw_info(struct iommufd_ucmd *ucmd)
void *data;
int rc;
- if (cmd->flags || cmd->__reserved)
+ if (cmd->flags || cmd->__reserved[0] || cmd->__reserved[1] ||
+ cmd->__reserved[2])
return -EOPNOTSUPP;
idev = iommufd_get_device(ucmd, cmd->dev_id);
@@ -1462,6 +1521,36 @@ int iommufd_get_hw_info(struct iommufd_ucmd *ucmd)
if (device_iommu_capable(idev->dev, IOMMU_CAP_DIRTY_TRACKING))
cmd->out_capabilities |= IOMMU_HW_CAP_DIRTY_TRACKING;
+ cmd->out_max_pasid_log2 = 0;
+ /*
+ * Currently, all iommu drivers enable PASID in the probe_device()
+ * op if iommu and device supports it. So the max_pasids stored in
+ * dev->iommu indicates both PASID support and enable status. A
+ * non-zero dev->iommu->max_pasids means PASID is supported and
+ * enabled. The iommufd only reports PASID capability to userspace
+ * if it's enabled.
+ */
+ if (idev->dev->iommu->max_pasids) {
+ cmd->out_max_pasid_log2 = ilog2(idev->dev->iommu->max_pasids);
+
+ if (dev_is_pci(idev->dev)) {
+ struct pci_dev *pdev = to_pci_dev(idev->dev);
+ int ctrl;
+
+ ctrl = pci_pasid_status(pdev);
+
+ WARN_ON_ONCE(ctrl < 0 ||
+ !(ctrl & PCI_PASID_CTRL_ENABLE));
+
+ if (ctrl & PCI_PASID_CTRL_EXEC)
+ cmd->out_capabilities |=
+ IOMMU_HW_CAP_PCI_PASID_EXEC;
+ if (ctrl & PCI_PASID_CTRL_PRIV)
+ cmd->out_capabilities |=
+ IOMMU_HW_CAP_PCI_PASID_PRIV;
+ }
+ }
+
rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
out_free:
kfree(data);
diff --git a/drivers/iommu/iommufd/driver.c b/drivers/iommu/iommufd/driver.c
index 2d98b04ff1cb..922cd1fe7ec2 100644
--- a/drivers/iommu/iommufd/driver.c
+++ b/drivers/iommu/iommufd/driver.c
@@ -49,5 +49,203 @@ struct device *iommufd_viommu_find_dev(struct iommufd_viommu *viommu,
}
EXPORT_SYMBOL_NS_GPL(iommufd_viommu_find_dev, "IOMMUFD");
+/* Return -ENOENT if device is not associated to the vIOMMU */
+int iommufd_viommu_get_vdev_id(struct iommufd_viommu *viommu,
+ struct device *dev, unsigned long *vdev_id)
+{
+ struct iommufd_vdevice *vdev;
+ unsigned long index;
+ int rc = -ENOENT;
+
+ if (WARN_ON_ONCE(!vdev_id))
+ return -EINVAL;
+
+ xa_lock(&viommu->vdevs);
+ xa_for_each(&viommu->vdevs, index, vdev) {
+ if (vdev->dev == dev) {
+ *vdev_id = vdev->id;
+ rc = 0;
+ break;
+ }
+ }
+ xa_unlock(&viommu->vdevs);
+ return rc;
+}
+EXPORT_SYMBOL_NS_GPL(iommufd_viommu_get_vdev_id, "IOMMUFD");
+
+/*
+ * Typically called in driver's threaded IRQ handler.
+ * The @type and @event_data must be defined in include/uapi/linux/iommufd.h
+ */
+int iommufd_viommu_report_event(struct iommufd_viommu *viommu,
+ enum iommu_veventq_type type, void *event_data,
+ size_t data_len)
+{
+ struct iommufd_veventq *veventq;
+ struct iommufd_vevent *vevent;
+ int rc = 0;
+
+ if (WARN_ON_ONCE(!data_len || !event_data))
+ return -EINVAL;
+
+ down_read(&viommu->veventqs_rwsem);
+
+ veventq = iommufd_viommu_find_veventq(viommu, type);
+ if (!veventq) {
+ rc = -EOPNOTSUPP;
+ goto out_unlock_veventqs;
+ }
+
+ spin_lock(&veventq->common.lock);
+ if (veventq->num_events == veventq->depth) {
+ vevent = &veventq->lost_events_header;
+ goto out_set_header;
+ }
+
+ vevent = kzalloc(struct_size(vevent, event_data, data_len), GFP_ATOMIC);
+ if (!vevent) {
+ rc = -ENOMEM;
+ vevent = &veventq->lost_events_header;
+ goto out_set_header;
+ }
+ memcpy(vevent->event_data, event_data, data_len);
+ vevent->data_len = data_len;
+ veventq->num_events++;
+
+out_set_header:
+ iommufd_vevent_handler(veventq, vevent);
+ spin_unlock(&veventq->common.lock);
+out_unlock_veventqs:
+ up_read(&viommu->veventqs_rwsem);
+ return rc;
+}
+EXPORT_SYMBOL_NS_GPL(iommufd_viommu_report_event, "IOMMUFD");
+
+#ifdef CONFIG_IRQ_MSI_IOMMU
+/*
+ * Get a iommufd_sw_msi_map for the msi physical address requested by the irq
+ * layer. The mapping to IOVA is global to the iommufd file descriptor, every
+ * domain that is attached to a device using the same MSI parameters will use
+ * the same IOVA.
+ */
+static struct iommufd_sw_msi_map *
+iommufd_sw_msi_get_map(struct iommufd_ctx *ictx, phys_addr_t msi_addr,
+ phys_addr_t sw_msi_start)
+{
+ struct iommufd_sw_msi_map *cur;
+ unsigned int max_pgoff = 0;
+
+ lockdep_assert_held(&ictx->sw_msi_lock);
+
+ list_for_each_entry(cur, &ictx->sw_msi_list, sw_msi_item) {
+ if (cur->sw_msi_start != sw_msi_start)
+ continue;
+ max_pgoff = max(max_pgoff, cur->pgoff + 1);
+ if (cur->msi_addr == msi_addr)
+ return cur;
+ }
+
+ if (ictx->sw_msi_id >=
+ BITS_PER_BYTE * sizeof_field(struct iommufd_sw_msi_maps, bitmap))
+ return ERR_PTR(-EOVERFLOW);
+
+ cur = kzalloc(sizeof(*cur), GFP_KERNEL);
+ if (!cur)
+ return ERR_PTR(-ENOMEM);
+
+ cur->sw_msi_start = sw_msi_start;
+ cur->msi_addr = msi_addr;
+ cur->pgoff = max_pgoff;
+ cur->id = ictx->sw_msi_id++;
+ list_add_tail(&cur->sw_msi_item, &ictx->sw_msi_list);
+ return cur;
+}
+
+int iommufd_sw_msi_install(struct iommufd_ctx *ictx,
+ struct iommufd_hwpt_paging *hwpt_paging,
+ struct iommufd_sw_msi_map *msi_map)
+{
+ unsigned long iova;
+
+ lockdep_assert_held(&ictx->sw_msi_lock);
+
+ iova = msi_map->sw_msi_start + msi_map->pgoff * PAGE_SIZE;
+ if (!test_bit(msi_map->id, hwpt_paging->present_sw_msi.bitmap)) {
+ int rc;
+
+ rc = iommu_map(hwpt_paging->common.domain, iova,
+ msi_map->msi_addr, PAGE_SIZE,
+ IOMMU_WRITE | IOMMU_READ | IOMMU_MMIO,
+ GFP_KERNEL_ACCOUNT);
+ if (rc)
+ return rc;
+ __set_bit(msi_map->id, hwpt_paging->present_sw_msi.bitmap);
+ }
+ return 0;
+}
+EXPORT_SYMBOL_NS_GPL(iommufd_sw_msi_install, "IOMMUFD_INTERNAL");
+
+/*
+ * Called by the irq code if the platform translates the MSI address through the
+ * IOMMU. msi_addr is the physical address of the MSI page. iommufd will
+ * allocate a fd global iova for the physical page that is the same on all
+ * domains and devices.
+ */
+int iommufd_sw_msi(struct iommu_domain *domain, struct msi_desc *desc,
+ phys_addr_t msi_addr)
+{
+ struct device *dev = msi_desc_to_dev(desc);
+ struct iommufd_hwpt_paging *hwpt_paging;
+ struct iommu_attach_handle *raw_handle;
+ struct iommufd_attach_handle *handle;
+ struct iommufd_sw_msi_map *msi_map;
+ struct iommufd_ctx *ictx;
+ unsigned long iova;
+ int rc;
+
+ /*
+ * It is safe to call iommu_attach_handle_get() here because the iommu
+ * core code invokes this under the group mutex which also prevents any
+ * change of the attach handle for the duration of this function.
+ */
+ iommu_group_mutex_assert(dev);
+
+ raw_handle =
+ iommu_attach_handle_get(dev->iommu_group, IOMMU_NO_PASID, 0);
+ if (IS_ERR(raw_handle))
+ return 0;
+ hwpt_paging = find_hwpt_paging(domain->iommufd_hwpt);
+
+ handle = to_iommufd_handle(raw_handle);
+ /* No IOMMU_RESV_SW_MSI means no change to the msi_msg */
+ if (handle->idev->igroup->sw_msi_start == PHYS_ADDR_MAX)
+ return 0;
+
+ ictx = handle->idev->ictx;
+ guard(mutex)(&ictx->sw_msi_lock);
+ /*
+ * The input msi_addr is the exact byte offset of the MSI doorbell, we
+ * assume the caller has checked that it is contained with a MMIO region
+ * that is secure to map at PAGE_SIZE.
+ */
+ msi_map = iommufd_sw_msi_get_map(handle->idev->ictx,
+ msi_addr & PAGE_MASK,
+ handle->idev->igroup->sw_msi_start);
+ if (IS_ERR(msi_map))
+ return PTR_ERR(msi_map);
+
+ rc = iommufd_sw_msi_install(ictx, hwpt_paging, msi_map);
+ if (rc)
+ return rc;
+ __set_bit(msi_map->id, handle->idev->igroup->required_sw_msi.bitmap);
+
+ iova = msi_map->sw_msi_start + msi_map->pgoff * PAGE_SIZE;
+ msi_desc_set_iommu_msi_iova(desc, iova, PAGE_SHIFT);
+ return 0;
+}
+EXPORT_SYMBOL_NS_GPL(iommufd_sw_msi, "IOMMUFD");
+#endif
+
MODULE_DESCRIPTION("iommufd code shared with builtin modules");
+MODULE_IMPORT_NS("IOMMUFD_INTERNAL");
MODULE_LICENSE("GPL");
diff --git a/drivers/iommu/iommufd/eventq.c b/drivers/iommu/iommufd/eventq.c
new file mode 100644
index 000000000000..f39cf0797347
--- /dev/null
+++ b/drivers/iommu/iommufd/eventq.c
@@ -0,0 +1,598 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2024 Intel Corporation
+ */
+#define pr_fmt(fmt) "iommufd: " fmt
+
+#include <linux/anon_inodes.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/iommufd.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/pci.h>
+#include <linux/pci-ats.h>
+#include <linux/poll.h>
+#include <uapi/linux/iommufd.h>
+
+#include "../iommu-priv.h"
+#include "iommufd_private.h"
+
+/* IOMMUFD_OBJ_FAULT Functions */
+
+int iommufd_fault_iopf_enable(struct iommufd_device *idev)
+{
+ struct device *dev = idev->dev;
+ int ret;
+
+ /*
+ * Once we turn on PCI/PRI support for VF, the response failure code
+ * should not be forwarded to the hardware due to PRI being a shared
+ * resource between PF and VFs. There is no coordination for this
+ * shared capability. This waits for a vPRI reset to recover.
+ */
+ if (dev_is_pci(dev)) {
+ struct pci_dev *pdev = to_pci_dev(dev);
+
+ if (pdev->is_virtfn && pci_pri_supported(pdev))
+ return -EINVAL;
+ }
+
+ mutex_lock(&idev->iopf_lock);
+ /* Device iopf has already been on. */
+ if (++idev->iopf_enabled > 1) {
+ mutex_unlock(&idev->iopf_lock);
+ return 0;
+ }
+
+ ret = iommu_dev_enable_feature(dev, IOMMU_DEV_FEAT_IOPF);
+ if (ret)
+ --idev->iopf_enabled;
+ mutex_unlock(&idev->iopf_lock);
+
+ return ret;
+}
+
+void iommufd_fault_iopf_disable(struct iommufd_device *idev)
+{
+ mutex_lock(&idev->iopf_lock);
+ if (!WARN_ON(idev->iopf_enabled == 0)) {
+ if (--idev->iopf_enabled == 0)
+ iommu_dev_disable_feature(idev->dev, IOMMU_DEV_FEAT_IOPF);
+ }
+ mutex_unlock(&idev->iopf_lock);
+}
+
+void iommufd_auto_response_faults(struct iommufd_hw_pagetable *hwpt,
+ struct iommufd_attach_handle *handle)
+{
+ struct iommufd_fault *fault = hwpt->fault;
+ struct iopf_group *group, *next;
+ struct list_head free_list;
+ unsigned long index;
+
+ if (!fault)
+ return;
+ INIT_LIST_HEAD(&free_list);
+
+ mutex_lock(&fault->mutex);
+ spin_lock(&fault->common.lock);
+ list_for_each_entry_safe(group, next, &fault->common.deliver, node) {
+ if (group->attach_handle != &handle->handle)
+ continue;
+ list_move(&group->node, &free_list);
+ }
+ spin_unlock(&fault->common.lock);
+
+ list_for_each_entry_safe(group, next, &free_list, node) {
+ list_del(&group->node);
+ iopf_group_response(group, IOMMU_PAGE_RESP_INVALID);
+ iopf_free_group(group);
+ }
+
+ xa_for_each(&fault->response, index, group) {
+ if (group->attach_handle != &handle->handle)
+ continue;
+ xa_erase(&fault->response, index);
+ iopf_group_response(group, IOMMU_PAGE_RESP_INVALID);
+ iopf_free_group(group);
+ }
+ mutex_unlock(&fault->mutex);
+}
+
+void iommufd_fault_destroy(struct iommufd_object *obj)
+{
+ struct iommufd_eventq *eventq =
+ container_of(obj, struct iommufd_eventq, obj);
+ struct iommufd_fault *fault = eventq_to_fault(eventq);
+ struct iopf_group *group, *next;
+ unsigned long index;
+
+ /*
+ * The iommufd object's reference count is zero at this point.
+ * We can be confident that no other threads are currently
+ * accessing this pointer. Therefore, acquiring the mutex here
+ * is unnecessary.
+ */
+ list_for_each_entry_safe(group, next, &fault->common.deliver, node) {
+ list_del(&group->node);
+ iopf_group_response(group, IOMMU_PAGE_RESP_INVALID);
+ iopf_free_group(group);
+ }
+ xa_for_each(&fault->response, index, group) {
+ xa_erase(&fault->response, index);
+ iopf_group_response(group, IOMMU_PAGE_RESP_INVALID);
+ iopf_free_group(group);
+ }
+ xa_destroy(&fault->response);
+ mutex_destroy(&fault->mutex);
+}
+
+static void iommufd_compose_fault_message(struct iommu_fault *fault,
+ struct iommu_hwpt_pgfault *hwpt_fault,
+ struct iommufd_device *idev,
+ u32 cookie)
+{
+ hwpt_fault->flags = fault->prm.flags;
+ hwpt_fault->dev_id = idev->obj.id;
+ hwpt_fault->pasid = fault->prm.pasid;
+ hwpt_fault->grpid = fault->prm.grpid;
+ hwpt_fault->perm = fault->prm.perm;
+ hwpt_fault->addr = fault->prm.addr;
+ hwpt_fault->length = 0;
+ hwpt_fault->cookie = cookie;
+}
+
+/* Fetch the first node out of the fault->deliver list */
+static struct iopf_group *
+iommufd_fault_deliver_fetch(struct iommufd_fault *fault)
+{
+ struct list_head *list = &fault->common.deliver;
+ struct iopf_group *group = NULL;
+
+ spin_lock(&fault->common.lock);
+ if (!list_empty(list)) {
+ group = list_first_entry(list, struct iopf_group, node);
+ list_del(&group->node);
+ }
+ spin_unlock(&fault->common.lock);
+ return group;
+}
+
+/* Restore a node back to the head of the fault->deliver list */
+static void iommufd_fault_deliver_restore(struct iommufd_fault *fault,
+ struct iopf_group *group)
+{
+ spin_lock(&fault->common.lock);
+ list_add(&group->node, &fault->common.deliver);
+ spin_unlock(&fault->common.lock);
+}
+
+static ssize_t iommufd_fault_fops_read(struct file *filep, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ size_t fault_size = sizeof(struct iommu_hwpt_pgfault);
+ struct iommufd_eventq *eventq = filep->private_data;
+ struct iommufd_fault *fault = eventq_to_fault(eventq);
+ struct iommu_hwpt_pgfault data = {};
+ struct iommufd_device *idev;
+ struct iopf_group *group;
+ struct iopf_fault *iopf;
+ size_t done = 0;
+ int rc = 0;
+
+ if (*ppos || count % fault_size)
+ return -ESPIPE;
+
+ mutex_lock(&fault->mutex);
+ while ((group = iommufd_fault_deliver_fetch(fault))) {
+ if (done >= count ||
+ group->fault_count * fault_size > count - done) {
+ iommufd_fault_deliver_restore(fault, group);
+ break;
+ }
+
+ rc = xa_alloc(&fault->response, &group->cookie, group,
+ xa_limit_32b, GFP_KERNEL);
+ if (rc) {
+ iommufd_fault_deliver_restore(fault, group);
+ break;
+ }
+
+ idev = to_iommufd_handle(group->attach_handle)->idev;
+ list_for_each_entry(iopf, &group->faults, list) {
+ iommufd_compose_fault_message(&iopf->fault,
+ &data, idev,
+ group->cookie);
+ if (copy_to_user(buf + done, &data, fault_size)) {
+ xa_erase(&fault->response, group->cookie);
+ iommufd_fault_deliver_restore(fault, group);
+ rc = -EFAULT;
+ break;
+ }
+ done += fault_size;
+ }
+ }
+ mutex_unlock(&fault->mutex);
+
+ return done == 0 ? rc : done;
+}
+
+static ssize_t iommufd_fault_fops_write(struct file *filep, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ size_t response_size = sizeof(struct iommu_hwpt_page_response);
+ struct iommufd_eventq *eventq = filep->private_data;
+ struct iommufd_fault *fault = eventq_to_fault(eventq);
+ struct iommu_hwpt_page_response response;
+ struct iopf_group *group;
+ size_t done = 0;
+ int rc = 0;
+
+ if (*ppos || count % response_size)
+ return -ESPIPE;
+
+ mutex_lock(&fault->mutex);
+ while (count > done) {
+ rc = copy_from_user(&response, buf + done, response_size);
+ if (rc)
+ break;
+
+ static_assert((int)IOMMUFD_PAGE_RESP_SUCCESS ==
+ (int)IOMMU_PAGE_RESP_SUCCESS);
+ static_assert((int)IOMMUFD_PAGE_RESP_INVALID ==
+ (int)IOMMU_PAGE_RESP_INVALID);
+ if (response.code != IOMMUFD_PAGE_RESP_SUCCESS &&
+ response.code != IOMMUFD_PAGE_RESP_INVALID) {
+ rc = -EINVAL;
+ break;
+ }
+
+ group = xa_erase(&fault->response, response.cookie);
+ if (!group) {
+ rc = -EINVAL;
+ break;
+ }
+
+ iopf_group_response(group, response.code);
+ iopf_free_group(group);
+ done += response_size;
+ }
+ mutex_unlock(&fault->mutex);
+
+ return done == 0 ? rc : done;
+}
+
+/* IOMMUFD_OBJ_VEVENTQ Functions */
+
+void iommufd_veventq_abort(struct iommufd_object *obj)
+{
+ struct iommufd_eventq *eventq =
+ container_of(obj, struct iommufd_eventq, obj);
+ struct iommufd_veventq *veventq = eventq_to_veventq(eventq);
+ struct iommufd_viommu *viommu = veventq->viommu;
+ struct iommufd_vevent *cur, *next;
+
+ lockdep_assert_held_write(&viommu->veventqs_rwsem);
+
+ list_for_each_entry_safe(cur, next, &eventq->deliver, node) {
+ list_del(&cur->node);
+ if (cur != &veventq->lost_events_header)
+ kfree(cur);
+ }
+
+ refcount_dec(&viommu->obj.users);
+ list_del(&veventq->node);
+}
+
+void iommufd_veventq_destroy(struct iommufd_object *obj)
+{
+ struct iommufd_veventq *veventq = eventq_to_veventq(
+ container_of(obj, struct iommufd_eventq, obj));
+
+ down_write(&veventq->viommu->veventqs_rwsem);
+ iommufd_veventq_abort(obj);
+ up_write(&veventq->viommu->veventqs_rwsem);
+}
+
+static struct iommufd_vevent *
+iommufd_veventq_deliver_fetch(struct iommufd_veventq *veventq)
+{
+ struct iommufd_eventq *eventq = &veventq->common;
+ struct list_head *list = &eventq->deliver;
+ struct iommufd_vevent *vevent = NULL;
+
+ spin_lock(&eventq->lock);
+ if (!list_empty(list)) {
+ struct iommufd_vevent *next;
+
+ next = list_first_entry(list, struct iommufd_vevent, node);
+ /* Make a copy of the lost_events_header for copy_to_user */
+ if (next == &veventq->lost_events_header) {
+ vevent = kzalloc(sizeof(*vevent), GFP_ATOMIC);
+ if (!vevent)
+ goto out_unlock;
+ }
+ list_del(&next->node);
+ if (vevent)
+ memcpy(vevent, next, sizeof(*vevent));
+ else
+ vevent = next;
+ }
+out_unlock:
+ spin_unlock(&eventq->lock);
+ return vevent;
+}
+
+static void iommufd_veventq_deliver_restore(struct iommufd_veventq *veventq,
+ struct iommufd_vevent *vevent)
+{
+ struct iommufd_eventq *eventq = &veventq->common;
+ struct list_head *list = &eventq->deliver;
+
+ spin_lock(&eventq->lock);
+ if (vevent_for_lost_events_header(vevent)) {
+ /* Remove the copy of the lost_events_header */
+ kfree(vevent);
+ vevent = NULL;
+ /* An empty list needs the lost_events_header back */
+ if (list_empty(list))
+ vevent = &veventq->lost_events_header;
+ }
+ if (vevent)
+ list_add(&vevent->node, list);
+ spin_unlock(&eventq->lock);
+}
+
+static ssize_t iommufd_veventq_fops_read(struct file *filep, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct iommufd_eventq *eventq = filep->private_data;
+ struct iommufd_veventq *veventq = eventq_to_veventq(eventq);
+ struct iommufd_vevent_header *hdr;
+ struct iommufd_vevent *cur;
+ size_t done = 0;
+ int rc = 0;
+
+ if (*ppos)
+ return -ESPIPE;
+
+ while ((cur = iommufd_veventq_deliver_fetch(veventq))) {
+ /* Validate the remaining bytes against the header size */
+ if (done >= count || sizeof(*hdr) > count - done) {
+ iommufd_veventq_deliver_restore(veventq, cur);
+ break;
+ }
+ hdr = &cur->header;
+
+ /* If being a normal vEVENT, validate against the full size */
+ if (!vevent_for_lost_events_header(cur) &&
+ sizeof(hdr) + cur->data_len > count - done) {
+ iommufd_veventq_deliver_restore(veventq, cur);
+ break;
+ }
+
+ if (copy_to_user(buf + done, hdr, sizeof(*hdr))) {
+ iommufd_veventq_deliver_restore(veventq, cur);
+ rc = -EFAULT;
+ break;
+ }
+ done += sizeof(*hdr);
+
+ if (cur->data_len &&
+ copy_to_user(buf + done, cur->event_data, cur->data_len)) {
+ iommufd_veventq_deliver_restore(veventq, cur);
+ rc = -EFAULT;
+ break;
+ }
+ spin_lock(&eventq->lock);
+ if (!vevent_for_lost_events_header(cur))
+ veventq->num_events--;
+ spin_unlock(&eventq->lock);
+ done += cur->data_len;
+ kfree(cur);
+ }
+
+ return done == 0 ? rc : done;
+}
+
+/* Common Event Queue Functions */
+
+static __poll_t iommufd_eventq_fops_poll(struct file *filep,
+ struct poll_table_struct *wait)
+{
+ struct iommufd_eventq *eventq = filep->private_data;
+ __poll_t pollflags = 0;
+
+ if (eventq->obj.type == IOMMUFD_OBJ_FAULT)
+ pollflags |= EPOLLOUT;
+
+ poll_wait(filep, &eventq->wait_queue, wait);
+ spin_lock(&eventq->lock);
+ if (!list_empty(&eventq->deliver))
+ pollflags |= EPOLLIN | EPOLLRDNORM;
+ spin_unlock(&eventq->lock);
+
+ return pollflags;
+}
+
+static int iommufd_eventq_fops_release(struct inode *inode, struct file *filep)
+{
+ struct iommufd_eventq *eventq = filep->private_data;
+
+ refcount_dec(&eventq->obj.users);
+ iommufd_ctx_put(eventq->ictx);
+ return 0;
+}
+
+#define INIT_EVENTQ_FOPS(read_op, write_op) \
+ ((const struct file_operations){ \
+ .owner = THIS_MODULE, \
+ .open = nonseekable_open, \
+ .read = read_op, \
+ .write = write_op, \
+ .poll = iommufd_eventq_fops_poll, \
+ .release = iommufd_eventq_fops_release, \
+ })
+
+static int iommufd_eventq_init(struct iommufd_eventq *eventq, char *name,
+ struct iommufd_ctx *ictx,
+ const struct file_operations *fops)
+{
+ struct file *filep;
+ int fdno;
+
+ spin_lock_init(&eventq->lock);
+ INIT_LIST_HEAD(&eventq->deliver);
+ init_waitqueue_head(&eventq->wait_queue);
+
+ filep = anon_inode_getfile(name, fops, eventq, O_RDWR);
+ if (IS_ERR(filep))
+ return PTR_ERR(filep);
+
+ eventq->ictx = ictx;
+ iommufd_ctx_get(eventq->ictx);
+ eventq->filep = filep;
+ refcount_inc(&eventq->obj.users);
+
+ fdno = get_unused_fd_flags(O_CLOEXEC);
+ if (fdno < 0)
+ fput(filep);
+ return fdno;
+}
+
+static const struct file_operations iommufd_fault_fops =
+ INIT_EVENTQ_FOPS(iommufd_fault_fops_read, iommufd_fault_fops_write);
+
+int iommufd_fault_alloc(struct iommufd_ucmd *ucmd)
+{
+ struct iommu_fault_alloc *cmd = ucmd->cmd;
+ struct iommufd_fault *fault;
+ int fdno;
+ int rc;
+
+ if (cmd->flags)
+ return -EOPNOTSUPP;
+
+ fault = __iommufd_object_alloc(ucmd->ictx, fault, IOMMUFD_OBJ_FAULT,
+ common.obj);
+ if (IS_ERR(fault))
+ return PTR_ERR(fault);
+
+ xa_init_flags(&fault->response, XA_FLAGS_ALLOC1);
+ mutex_init(&fault->mutex);
+
+ fdno = iommufd_eventq_init(&fault->common, "[iommufd-pgfault]",
+ ucmd->ictx, &iommufd_fault_fops);
+ if (fdno < 0) {
+ rc = fdno;
+ goto out_abort;
+ }
+
+ cmd->out_fault_id = fault->common.obj.id;
+ cmd->out_fault_fd = fdno;
+
+ rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
+ if (rc)
+ goto out_put_fdno;
+ iommufd_object_finalize(ucmd->ictx, &fault->common.obj);
+
+ fd_install(fdno, fault->common.filep);
+
+ return 0;
+out_put_fdno:
+ put_unused_fd(fdno);
+ fput(fault->common.filep);
+out_abort:
+ iommufd_object_abort_and_destroy(ucmd->ictx, &fault->common.obj);
+
+ return rc;
+}
+
+int iommufd_fault_iopf_handler(struct iopf_group *group)
+{
+ struct iommufd_hw_pagetable *hwpt;
+ struct iommufd_fault *fault;
+
+ hwpt = group->attach_handle->domain->iommufd_hwpt;
+ fault = hwpt->fault;
+
+ spin_lock(&fault->common.lock);
+ list_add_tail(&group->node, &fault->common.deliver);
+ spin_unlock(&fault->common.lock);
+
+ wake_up_interruptible(&fault->common.wait_queue);
+
+ return 0;
+}
+
+static const struct file_operations iommufd_veventq_fops =
+ INIT_EVENTQ_FOPS(iommufd_veventq_fops_read, NULL);
+
+int iommufd_veventq_alloc(struct iommufd_ucmd *ucmd)
+{
+ struct iommu_veventq_alloc *cmd = ucmd->cmd;
+ struct iommufd_veventq *veventq;
+ struct iommufd_viommu *viommu;
+ int fdno;
+ int rc;
+
+ if (cmd->flags || cmd->__reserved ||
+ cmd->type == IOMMU_VEVENTQ_TYPE_DEFAULT)
+ return -EOPNOTSUPP;
+ if (!cmd->veventq_depth)
+ return -EINVAL;
+
+ viommu = iommufd_get_viommu(ucmd, cmd->viommu_id);
+ if (IS_ERR(viommu))
+ return PTR_ERR(viommu);
+
+ down_write(&viommu->veventqs_rwsem);
+
+ if (iommufd_viommu_find_veventq(viommu, cmd->type)) {
+ rc = -EEXIST;
+ goto out_unlock_veventqs;
+ }
+
+ veventq = __iommufd_object_alloc(ucmd->ictx, veventq,
+ IOMMUFD_OBJ_VEVENTQ, common.obj);
+ if (IS_ERR(veventq)) {
+ rc = PTR_ERR(veventq);
+ goto out_unlock_veventqs;
+ }
+
+ veventq->type = cmd->type;
+ veventq->viommu = viommu;
+ refcount_inc(&viommu->obj.users);
+ veventq->depth = cmd->veventq_depth;
+ list_add_tail(&veventq->node, &viommu->veventqs);
+ veventq->lost_events_header.header.flags =
+ IOMMU_VEVENTQ_FLAG_LOST_EVENTS;
+
+ fdno = iommufd_eventq_init(&veventq->common, "[iommufd-viommu-event]",
+ ucmd->ictx, &iommufd_veventq_fops);
+ if (fdno < 0) {
+ rc = fdno;
+ goto out_abort;
+ }
+
+ cmd->out_veventq_id = veventq->common.obj.id;
+ cmd->out_veventq_fd = fdno;
+
+ rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
+ if (rc)
+ goto out_put_fdno;
+
+ iommufd_object_finalize(ucmd->ictx, &veventq->common.obj);
+ fd_install(fdno, veventq->common.filep);
+ goto out_unlock_veventqs;
+
+out_put_fdno:
+ put_unused_fd(fdno);
+ fput(veventq->common.filep);
+out_abort:
+ iommufd_object_abort_and_destroy(ucmd->ictx, &veventq->common.obj);
+out_unlock_veventqs:
+ up_write(&viommu->veventqs_rwsem);
+ iommufd_put_object(ucmd->ictx, &viommu->obj);
+ return rc;
+}
diff --git a/drivers/iommu/iommufd/fault.c b/drivers/iommu/iommufd/fault.c
deleted file mode 100644
index c48d72c9668c..000000000000
--- a/drivers/iommu/iommufd/fault.c
+++ /dev/null
@@ -1,342 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/* Copyright (C) 2024 Intel Corporation
- */
-#define pr_fmt(fmt) "iommufd: " fmt
-
-#include <linux/anon_inodes.h>
-#include <linux/file.h>
-#include <linux/fs.h>
-#include <linux/iommufd.h>
-#include <linux/module.h>
-#include <linux/mutex.h>
-#include <linux/pci.h>
-#include <linux/pci-ats.h>
-#include <linux/poll.h>
-#include <uapi/linux/iommufd.h>
-
-#include "../iommu-priv.h"
-#include "iommufd_private.h"
-
-int iommufd_fault_iopf_enable(struct iommufd_device *idev)
-{
- struct device *dev = idev->dev;
- int ret;
-
- /*
- * Once we turn on PCI/PRI support for VF, the response failure code
- * should not be forwarded to the hardware due to PRI being a shared
- * resource between PF and VFs. There is no coordination for this
- * shared capability. This waits for a vPRI reset to recover.
- */
- if (dev_is_pci(dev)) {
- struct pci_dev *pdev = to_pci_dev(dev);
-
- if (pdev->is_virtfn && pci_pri_supported(pdev))
- return -EINVAL;
- }
-
- mutex_lock(&idev->iopf_lock);
- /* Device iopf has already been on. */
- if (++idev->iopf_enabled > 1) {
- mutex_unlock(&idev->iopf_lock);
- return 0;
- }
-
- ret = iommu_dev_enable_feature(dev, IOMMU_DEV_FEAT_IOPF);
- if (ret)
- --idev->iopf_enabled;
- mutex_unlock(&idev->iopf_lock);
-
- return ret;
-}
-
-void iommufd_fault_iopf_disable(struct iommufd_device *idev)
-{
- mutex_lock(&idev->iopf_lock);
- if (!WARN_ON(idev->iopf_enabled == 0)) {
- if (--idev->iopf_enabled == 0)
- iommu_dev_disable_feature(idev->dev, IOMMU_DEV_FEAT_IOPF);
- }
- mutex_unlock(&idev->iopf_lock);
-}
-
-void iommufd_auto_response_faults(struct iommufd_hw_pagetable *hwpt,
- struct iommufd_attach_handle *handle)
-{
- struct iommufd_fault *fault = hwpt->fault;
- struct iopf_group *group, *next;
- struct list_head free_list;
- unsigned long index;
-
- if (!fault)
- return;
- INIT_LIST_HEAD(&free_list);
-
- mutex_lock(&fault->mutex);
- spin_lock(&fault->lock);
- list_for_each_entry_safe(group, next, &fault->deliver, node) {
- if (group->attach_handle != &handle->handle)
- continue;
- list_move(&group->node, &free_list);
- }
- spin_unlock(&fault->lock);
-
- list_for_each_entry_safe(group, next, &free_list, node) {
- list_del(&group->node);
- iopf_group_response(group, IOMMU_PAGE_RESP_INVALID);
- iopf_free_group(group);
- }
-
- xa_for_each(&fault->response, index, group) {
- if (group->attach_handle != &handle->handle)
- continue;
- xa_erase(&fault->response, index);
- iopf_group_response(group, IOMMU_PAGE_RESP_INVALID);
- iopf_free_group(group);
- }
- mutex_unlock(&fault->mutex);
-}
-
-void iommufd_fault_destroy(struct iommufd_object *obj)
-{
- struct iommufd_fault *fault = container_of(obj, struct iommufd_fault, obj);
- struct iopf_group *group, *next;
- unsigned long index;
-
- /*
- * The iommufd object's reference count is zero at this point.
- * We can be confident that no other threads are currently
- * accessing this pointer. Therefore, acquiring the mutex here
- * is unnecessary.
- */
- list_for_each_entry_safe(group, next, &fault->deliver, node) {
- list_del(&group->node);
- iopf_group_response(group, IOMMU_PAGE_RESP_INVALID);
- iopf_free_group(group);
- }
- xa_for_each(&fault->response, index, group) {
- xa_erase(&fault->response, index);
- iopf_group_response(group, IOMMU_PAGE_RESP_INVALID);
- iopf_free_group(group);
- }
- xa_destroy(&fault->response);
- mutex_destroy(&fault->mutex);
-}
-
-static void iommufd_compose_fault_message(struct iommu_fault *fault,
- struct iommu_hwpt_pgfault *hwpt_fault,
- struct iommufd_device *idev,
- u32 cookie)
-{
- hwpt_fault->flags = fault->prm.flags;
- hwpt_fault->dev_id = idev->obj.id;
- hwpt_fault->pasid = fault->prm.pasid;
- hwpt_fault->grpid = fault->prm.grpid;
- hwpt_fault->perm = fault->prm.perm;
- hwpt_fault->addr = fault->prm.addr;
- hwpt_fault->length = 0;
- hwpt_fault->cookie = cookie;
-}
-
-static ssize_t iommufd_fault_fops_read(struct file *filep, char __user *buf,
- size_t count, loff_t *ppos)
-{
- size_t fault_size = sizeof(struct iommu_hwpt_pgfault);
- struct iommufd_fault *fault = filep->private_data;
- struct iommu_hwpt_pgfault data = {};
- struct iommufd_device *idev;
- struct iopf_group *group;
- struct iopf_fault *iopf;
- size_t done = 0;
- int rc = 0;
-
- if (*ppos || count % fault_size)
- return -ESPIPE;
-
- mutex_lock(&fault->mutex);
- while ((group = iommufd_fault_deliver_fetch(fault))) {
- if (done >= count ||
- group->fault_count * fault_size > count - done) {
- iommufd_fault_deliver_restore(fault, group);
- break;
- }
-
- rc = xa_alloc(&fault->response, &group->cookie, group,
- xa_limit_32b, GFP_KERNEL);
- if (rc) {
- iommufd_fault_deliver_restore(fault, group);
- break;
- }
-
- idev = to_iommufd_handle(group->attach_handle)->idev;
- list_for_each_entry(iopf, &group->faults, list) {
- iommufd_compose_fault_message(&iopf->fault,
- &data, idev,
- group->cookie);
- if (copy_to_user(buf + done, &data, fault_size)) {
- xa_erase(&fault->response, group->cookie);
- iommufd_fault_deliver_restore(fault, group);
- rc = -EFAULT;
- break;
- }
- done += fault_size;
- }
- }
- mutex_unlock(&fault->mutex);
-
- return done == 0 ? rc : done;
-}
-
-static ssize_t iommufd_fault_fops_write(struct file *filep, const char __user *buf,
- size_t count, loff_t *ppos)
-{
- size_t response_size = sizeof(struct iommu_hwpt_page_response);
- struct iommufd_fault *fault = filep->private_data;
- struct iommu_hwpt_page_response response;
- struct iopf_group *group;
- size_t done = 0;
- int rc = 0;
-
- if (*ppos || count % response_size)
- return -ESPIPE;
-
- mutex_lock(&fault->mutex);
- while (count > done) {
- rc = copy_from_user(&response, buf + done, response_size);
- if (rc)
- break;
-
- static_assert((int)IOMMUFD_PAGE_RESP_SUCCESS ==
- (int)IOMMU_PAGE_RESP_SUCCESS);
- static_assert((int)IOMMUFD_PAGE_RESP_INVALID ==
- (int)IOMMU_PAGE_RESP_INVALID);
- if (response.code != IOMMUFD_PAGE_RESP_SUCCESS &&
- response.code != IOMMUFD_PAGE_RESP_INVALID) {
- rc = -EINVAL;
- break;
- }
-
- group = xa_erase(&fault->response, response.cookie);
- if (!group) {
- rc = -EINVAL;
- break;
- }
-
- iopf_group_response(group, response.code);
- iopf_free_group(group);
- done += response_size;
- }
- mutex_unlock(&fault->mutex);
-
- return done == 0 ? rc : done;
-}
-
-static __poll_t iommufd_fault_fops_poll(struct file *filep,
- struct poll_table_struct *wait)
-{
- struct iommufd_fault *fault = filep->private_data;
- __poll_t pollflags = EPOLLOUT;
-
- poll_wait(filep, &fault->wait_queue, wait);
- spin_lock(&fault->lock);
- if (!list_empty(&fault->deliver))
- pollflags |= EPOLLIN | EPOLLRDNORM;
- spin_unlock(&fault->lock);
-
- return pollflags;
-}
-
-static int iommufd_fault_fops_release(struct inode *inode, struct file *filep)
-{
- struct iommufd_fault *fault = filep->private_data;
-
- refcount_dec(&fault->obj.users);
- iommufd_ctx_put(fault->ictx);
- return 0;
-}
-
-static const struct file_operations iommufd_fault_fops = {
- .owner = THIS_MODULE,
- .open = nonseekable_open,
- .read = iommufd_fault_fops_read,
- .write = iommufd_fault_fops_write,
- .poll = iommufd_fault_fops_poll,
- .release = iommufd_fault_fops_release,
-};
-
-int iommufd_fault_alloc(struct iommufd_ucmd *ucmd)
-{
- struct iommu_fault_alloc *cmd = ucmd->cmd;
- struct iommufd_fault *fault;
- struct file *filep;
- int fdno;
- int rc;
-
- if (cmd->flags)
- return -EOPNOTSUPP;
-
- fault = iommufd_object_alloc(ucmd->ictx, fault, IOMMUFD_OBJ_FAULT);
- if (IS_ERR(fault))
- return PTR_ERR(fault);
-
- fault->ictx = ucmd->ictx;
- INIT_LIST_HEAD(&fault->deliver);
- xa_init_flags(&fault->response, XA_FLAGS_ALLOC1);
- mutex_init(&fault->mutex);
- spin_lock_init(&fault->lock);
- init_waitqueue_head(&fault->wait_queue);
-
- filep = anon_inode_getfile("[iommufd-pgfault]", &iommufd_fault_fops,
- fault, O_RDWR);
- if (IS_ERR(filep)) {
- rc = PTR_ERR(filep);
- goto out_abort;
- }
-
- refcount_inc(&fault->obj.users);
- iommufd_ctx_get(fault->ictx);
- fault->filep = filep;
-
- fdno = get_unused_fd_flags(O_CLOEXEC);
- if (fdno < 0) {
- rc = fdno;
- goto out_fput;
- }
-
- cmd->out_fault_id = fault->obj.id;
- cmd->out_fault_fd = fdno;
-
- rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
- if (rc)
- goto out_put_fdno;
- iommufd_object_finalize(ucmd->ictx, &fault->obj);
-
- fd_install(fdno, fault->filep);
-
- return 0;
-out_put_fdno:
- put_unused_fd(fdno);
-out_fput:
- fput(filep);
-out_abort:
- iommufd_object_abort_and_destroy(ucmd->ictx, &fault->obj);
-
- return rc;
-}
-
-int iommufd_fault_iopf_handler(struct iopf_group *group)
-{
- struct iommufd_hw_pagetable *hwpt;
- struct iommufd_fault *fault;
-
- hwpt = group->attach_handle->domain->iommufd_hwpt;
- fault = hwpt->fault;
-
- spin_lock(&fault->lock);
- list_add_tail(&group->node, &fault->deliver);
- spin_unlock(&fault->lock);
-
- wake_up_interruptible(&fault->wait_queue);
-
- return 0;
-}
diff --git a/drivers/iommu/iommufd/hw_pagetable.c b/drivers/iommu/iommufd/hw_pagetable.c
index 7de6e914232e..487779470261 100644
--- a/drivers/iommu/iommufd/hw_pagetable.c
+++ b/drivers/iommu/iommufd/hw_pagetable.c
@@ -14,7 +14,7 @@ static void __iommufd_hwpt_destroy(struct iommufd_hw_pagetable *hwpt)
iommu_domain_free(hwpt->domain);
if (hwpt->fault)
- refcount_dec(&hwpt->fault->obj.users);
+ refcount_dec(&hwpt->fault->common.obj.users);
}
void iommufd_hwpt_paging_destroy(struct iommufd_object *obj)
@@ -90,6 +90,7 @@ iommufd_hwpt_paging_enforce_cc(struct iommufd_hwpt_paging *hwpt_paging)
* @ictx: iommufd context
* @ioas: IOAS to associate the domain with
* @idev: Device to get an iommu_domain for
+ * @pasid: PASID to get an iommu_domain for
* @flags: Flags from userspace
* @immediate_attach: True if idev should be attached to the hwpt
* @user_data: The user provided driver specific data describing the domain to
@@ -105,13 +106,14 @@ iommufd_hwpt_paging_enforce_cc(struct iommufd_hwpt_paging *hwpt_paging)
*/
struct iommufd_hwpt_paging *
iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas,
- struct iommufd_device *idev, u32 flags,
- bool immediate_attach,
+ struct iommufd_device *idev, ioasid_t pasid,
+ u32 flags, bool immediate_attach,
const struct iommu_user_data *user_data)
{
const u32 valid_flags = IOMMU_HWPT_ALLOC_NEST_PARENT |
IOMMU_HWPT_ALLOC_DIRTY_TRACKING |
- IOMMU_HWPT_FAULT_ID_VALID;
+ IOMMU_HWPT_FAULT_ID_VALID |
+ IOMMU_HWPT_ALLOC_PASID;
const struct iommu_ops *ops = dev_iommu_ops(idev->dev);
struct iommufd_hwpt_paging *hwpt_paging;
struct iommufd_hw_pagetable *hwpt;
@@ -126,12 +128,16 @@ iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas,
if ((flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING) &&
!device_iommu_capable(idev->dev, IOMMU_CAP_DIRTY_TRACKING))
return ERR_PTR(-EOPNOTSUPP);
+ if ((flags & IOMMU_HWPT_FAULT_ID_VALID) &&
+ (flags & IOMMU_HWPT_ALLOC_NEST_PARENT))
+ return ERR_PTR(-EOPNOTSUPP);
hwpt_paging = __iommufd_object_alloc(
ictx, hwpt_paging, IOMMUFD_OBJ_HWPT_PAGING, common.obj);
if (IS_ERR(hwpt_paging))
return ERR_CAST(hwpt_paging);
hwpt = &hwpt_paging->common;
+ hwpt->pasid_compat = flags & IOMMU_HWPT_ALLOC_PASID;
INIT_LIST_HEAD(&hwpt_paging->hwpt_item);
/* Pairs with iommufd_hw_pagetable_destroy() */
@@ -156,7 +162,8 @@ iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas,
goto out_abort;
}
}
- iommu_domain_set_sw_msi(hwpt->domain, iommufd_sw_msi);
+ hwpt->domain->iommufd_hwpt = hwpt;
+ hwpt->domain->cookie_type = IOMMU_COOKIE_IOMMUFD;
/*
* Set the coherency mode before we do iopt_table_add_domain() as some
@@ -185,7 +192,7 @@ iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas,
* sequence. Once those drivers are fixed this should be removed.
*/
if (immediate_attach) {
- rc = iommufd_hw_pagetable_attach(hwpt, idev);
+ rc = iommufd_hw_pagetable_attach(hwpt, idev, pasid);
if (rc)
goto out_abort;
}
@@ -198,7 +205,7 @@ iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas,
out_detach:
if (immediate_attach)
- iommufd_hw_pagetable_detach(idev);
+ iommufd_hw_pagetable_detach(idev, pasid);
out_abort:
iommufd_object_abort_and_destroy(ictx, &hwpt->obj);
return ERR_PTR(rc);
@@ -227,7 +234,7 @@ iommufd_hwpt_nested_alloc(struct iommufd_ctx *ictx,
struct iommufd_hw_pagetable *hwpt;
int rc;
- if ((flags & ~IOMMU_HWPT_FAULT_ID_VALID) ||
+ if ((flags & ~(IOMMU_HWPT_FAULT_ID_VALID | IOMMU_HWPT_ALLOC_PASID)) ||
!user_data->len || !ops->domain_alloc_nested)
return ERR_PTR(-EOPNOTSUPP);
if (parent->auto_domain || !parent->nest_parent ||
@@ -239,6 +246,7 @@ iommufd_hwpt_nested_alloc(struct iommufd_ctx *ictx,
if (IS_ERR(hwpt_nested))
return ERR_CAST(hwpt_nested);
hwpt = &hwpt_nested->common;
+ hwpt->pasid_compat = flags & IOMMU_HWPT_ALLOC_PASID;
refcount_inc(&parent->common.obj.users);
hwpt_nested->parent = parent;
@@ -252,7 +260,8 @@ iommufd_hwpt_nested_alloc(struct iommufd_ctx *ictx,
goto out_abort;
}
hwpt->domain->owner = ops;
- iommu_domain_set_sw_msi(hwpt->domain, iommufd_sw_msi);
+ hwpt->domain->iommufd_hwpt = hwpt;
+ hwpt->domain->cookie_type = IOMMU_COOKIE_IOMMUFD;
if (WARN_ON_ONCE(hwpt->domain->type != IOMMU_DOMAIN_NESTED)) {
rc = -EINVAL;
@@ -282,7 +291,7 @@ iommufd_viommu_alloc_hwpt_nested(struct iommufd_viommu *viommu, u32 flags,
struct iommufd_hw_pagetable *hwpt;
int rc;
- if (flags & ~IOMMU_HWPT_FAULT_ID_VALID)
+ if (flags & ~(IOMMU_HWPT_FAULT_ID_VALID | IOMMU_HWPT_ALLOC_PASID))
return ERR_PTR(-EOPNOTSUPP);
if (!user_data->len)
return ERR_PTR(-EOPNOTSUPP);
@@ -294,6 +303,7 @@ iommufd_viommu_alloc_hwpt_nested(struct iommufd_viommu *viommu, u32 flags,
if (IS_ERR(hwpt_nested))
return ERR_CAST(hwpt_nested);
hwpt = &hwpt_nested->common;
+ hwpt->pasid_compat = flags & IOMMU_HWPT_ALLOC_PASID;
hwpt_nested->viommu = viommu;
refcount_inc(&viommu->obj.users);
@@ -308,8 +318,9 @@ iommufd_viommu_alloc_hwpt_nested(struct iommufd_viommu *viommu, u32 flags,
hwpt->domain = NULL;
goto out_abort;
}
+ hwpt->domain->iommufd_hwpt = hwpt;
hwpt->domain->owner = viommu->iommu_dev->ops;
- iommu_domain_set_sw_msi(hwpt->domain, iommufd_sw_msi);
+ hwpt->domain->cookie_type = IOMMU_COOKIE_IOMMUFD;
if (WARN_ON_ONCE(hwpt->domain->type != IOMMU_DOMAIN_NESTED)) {
rc = -EINVAL;
@@ -358,8 +369,8 @@ int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd)
ioas = container_of(pt_obj, struct iommufd_ioas, obj);
mutex_lock(&ioas->mutex);
hwpt_paging = iommufd_hwpt_paging_alloc(
- ucmd->ictx, ioas, idev, cmd->flags, false,
- user_data.len ? &user_data : NULL);
+ ucmd->ictx, ioas, idev, IOMMU_NO_PASID, cmd->flags,
+ false, user_data.len ? &user_data : NULL);
if (IS_ERR(hwpt_paging)) {
rc = PTR_ERR(hwpt_paging);
goto out_unlock;
@@ -409,10 +420,9 @@ int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd)
}
hwpt->fault = fault;
hwpt->domain->iopf_handler = iommufd_fault_iopf_handler;
- refcount_inc(&fault->obj.users);
- iommufd_put_object(ucmd->ictx, &fault->obj);
+ refcount_inc(&fault->common.obj.users);
+ iommufd_put_object(ucmd->ictx, &fault->common.obj);
}
- hwpt->domain->iommufd_hwpt = hwpt;
cmd->out_hwpt_id = hwpt->obj.id;
rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
index 246297452a44..80e8c76d25f2 100644
--- a/drivers/iommu/iommufd/iommufd_private.h
+++ b/drivers/iommu/iommufd/iommufd_private.h
@@ -32,8 +32,11 @@ struct iommufd_sw_msi_maps {
DECLARE_BITMAP(bitmap, 64);
};
-int iommufd_sw_msi(struct iommu_domain *domain, struct msi_desc *desc,
- phys_addr_t msi_addr);
+#ifdef CONFIG_IRQ_MSI_IOMMU
+int iommufd_sw_msi_install(struct iommufd_ctx *ictx,
+ struct iommufd_hwpt_paging *hwpt_paging,
+ struct iommufd_sw_msi_map *msi_map);
+#endif
struct iommufd_ctx {
struct file *file;
@@ -296,6 +299,7 @@ struct iommufd_hw_pagetable {
struct iommufd_object obj;
struct iommu_domain *domain;
struct iommufd_fault *fault;
+ bool pasid_compat : 1;
};
struct iommufd_hwpt_paging {
@@ -366,13 +370,13 @@ int iommufd_hwpt_get_dirty_bitmap(struct iommufd_ucmd *ucmd);
struct iommufd_hwpt_paging *
iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas,
- struct iommufd_device *idev, u32 flags,
- bool immediate_attach,
+ struct iommufd_device *idev, ioasid_t pasid,
+ u32 flags, bool immediate_attach,
const struct iommu_user_data *user_data);
int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt,
- struct iommufd_device *idev);
+ struct iommufd_device *idev, ioasid_t pasid);
struct iommufd_hw_pagetable *
-iommufd_hw_pagetable_detach(struct iommufd_device *idev);
+iommufd_hw_pagetable_detach(struct iommufd_device *idev, ioasid_t pasid);
void iommufd_hwpt_paging_destroy(struct iommufd_object *obj);
void iommufd_hwpt_paging_abort(struct iommufd_object *obj);
void iommufd_hwpt_nested_destroy(struct iommufd_object *obj);
@@ -396,13 +400,14 @@ static inline void iommufd_hw_pagetable_put(struct iommufd_ctx *ictx,
refcount_dec(&hwpt->obj.users);
}
+struct iommufd_attach;
+
struct iommufd_group {
struct kref ref;
struct mutex lock;
struct iommufd_ctx *ictx;
struct iommu_group *group;
- struct iommufd_hw_pagetable *hwpt;
- struct list_head device_list;
+ struct xarray pasid_attach;
struct iommufd_sw_msi_maps required_sw_msi;
phys_addr_t sw_msi_start;
};
@@ -454,49 +459,17 @@ void iopt_remove_access(struct io_pagetable *iopt,
u32 iopt_access_list_id);
void iommufd_access_destroy_object(struct iommufd_object *obj);
-/*
- * An iommufd_fault object represents an interface to deliver I/O page faults
- * to the user space. These objects are created/destroyed by the user space and
- * associated with hardware page table objects during page-table allocation.
- */
-struct iommufd_fault {
+struct iommufd_eventq {
struct iommufd_object obj;
struct iommufd_ctx *ictx;
struct file *filep;
spinlock_t lock; /* protects the deliver list */
struct list_head deliver;
- struct mutex mutex; /* serializes response flows */
- struct xarray response;
struct wait_queue_head wait_queue;
};
-/* Fetch the first node out of the fault->deliver list */
-static inline struct iopf_group *
-iommufd_fault_deliver_fetch(struct iommufd_fault *fault)
-{
- struct list_head *list = &fault->deliver;
- struct iopf_group *group = NULL;
-
- spin_lock(&fault->lock);
- if (!list_empty(list)) {
- group = list_first_entry(list, struct iopf_group, node);
- list_del(&group->node);
- }
- spin_unlock(&fault->lock);
- return group;
-}
-
-/* Restore a node back to the head of the fault->deliver list */
-static inline void iommufd_fault_deliver_restore(struct iommufd_fault *fault,
- struct iopf_group *group)
-{
- spin_lock(&fault->lock);
- list_add(&group->node, &fault->deliver);
- spin_unlock(&fault->lock);
-}
-
struct iommufd_attach_handle {
struct iommu_attach_handle handle;
struct iommufd_device *idev;
@@ -505,12 +478,29 @@ struct iommufd_attach_handle {
/* Convert an iommu attach handle to iommufd handle. */
#define to_iommufd_handle(hdl) container_of(hdl, struct iommufd_attach_handle, handle)
+/*
+ * An iommufd_fault object represents an interface to deliver I/O page faults
+ * to the user space. These objects are created/destroyed by the user space and
+ * associated with hardware page table objects during page-table allocation.
+ */
+struct iommufd_fault {
+ struct iommufd_eventq common;
+ struct mutex mutex; /* serializes response flows */
+ struct xarray response;
+};
+
+static inline struct iommufd_fault *
+eventq_to_fault(struct iommufd_eventq *eventq)
+{
+ return container_of(eventq, struct iommufd_fault, common);
+}
+
static inline struct iommufd_fault *
iommufd_get_fault(struct iommufd_ucmd *ucmd, u32 id)
{
return container_of(iommufd_get_object(ucmd->ictx, id,
IOMMUFD_OBJ_FAULT),
- struct iommufd_fault, obj);
+ struct iommufd_fault, common.obj);
}
int iommufd_fault_alloc(struct iommufd_ucmd *ucmd);
@@ -522,6 +512,74 @@ void iommufd_fault_iopf_disable(struct iommufd_device *idev);
void iommufd_auto_response_faults(struct iommufd_hw_pagetable *hwpt,
struct iommufd_attach_handle *handle);
+/* An iommufd_vevent represents a vIOMMU event in an iommufd_veventq */
+struct iommufd_vevent {
+ struct iommufd_vevent_header header;
+ struct list_head node; /* for iommufd_eventq::deliver */
+ ssize_t data_len;
+ u64 event_data[] __counted_by(data_len);
+};
+
+#define vevent_for_lost_events_header(vevent) \
+ (vevent->header.flags & IOMMU_VEVENTQ_FLAG_LOST_EVENTS)
+
+/*
+ * An iommufd_veventq object represents an interface to deliver vIOMMU events to
+ * the user space. It is created/destroyed by the user space and associated with
+ * a vIOMMU object during the allocations.
+ */
+struct iommufd_veventq {
+ struct iommufd_eventq common;
+ struct iommufd_viommu *viommu;
+ struct list_head node; /* for iommufd_viommu::veventqs */
+ struct iommufd_vevent lost_events_header;
+
+ unsigned int type;
+ unsigned int depth;
+
+ /* Use common.lock for protection */
+ u32 num_events;
+ u32 sequence;
+};
+
+static inline struct iommufd_veventq *
+eventq_to_veventq(struct iommufd_eventq *eventq)
+{
+ return container_of(eventq, struct iommufd_veventq, common);
+}
+
+static inline struct iommufd_veventq *
+iommufd_get_veventq(struct iommufd_ucmd *ucmd, u32 id)
+{
+ return container_of(iommufd_get_object(ucmd->ictx, id,
+ IOMMUFD_OBJ_VEVENTQ),
+ struct iommufd_veventq, common.obj);
+}
+
+int iommufd_veventq_alloc(struct iommufd_ucmd *ucmd);
+void iommufd_veventq_destroy(struct iommufd_object *obj);
+void iommufd_veventq_abort(struct iommufd_object *obj);
+
+static inline void iommufd_vevent_handler(struct iommufd_veventq *veventq,
+ struct iommufd_vevent *vevent)
+{
+ struct iommufd_eventq *eventq = &veventq->common;
+
+ lockdep_assert_held(&eventq->lock);
+
+ /*
+ * Remove the lost_events_header and add the new node at the same time.
+ * Note the new node can be lost_events_header, for a sequence update.
+ */
+ if (list_is_last(&veventq->lost_events_header.node, &eventq->deliver))
+ list_del(&veventq->lost_events_header.node);
+ list_add_tail(&vevent->node, &eventq->deliver);
+ vevent->header.sequence = veventq->sequence;
+ veventq->sequence = (veventq->sequence + 1) & INT_MAX;
+
+ wake_up_interruptible(&eventq->wait_queue);
+}
+
static inline struct iommufd_viommu *
iommufd_get_viommu(struct iommufd_ucmd *ucmd, u32 id)
{
@@ -530,6 +588,20 @@ iommufd_get_viommu(struct iommufd_ucmd *ucmd, u32 id)
struct iommufd_viommu, obj);
}
+static inline struct iommufd_veventq *
+iommufd_viommu_find_veventq(struct iommufd_viommu *viommu, u32 type)
+{
+ struct iommufd_veventq *veventq, *next;
+
+ lockdep_assert_held(&viommu->veventqs_rwsem);
+
+ list_for_each_entry_safe(veventq, next, &viommu->veventqs, node) {
+ if (veventq->type == type)
+ return veventq;
+ }
+ return NULL;
+}
+
int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd);
void iommufd_viommu_destroy(struct iommufd_object *obj);
int iommufd_vdevice_alloc_ioctl(struct iommufd_ucmd *ucmd);
diff --git a/drivers/iommu/iommufd/iommufd_test.h b/drivers/iommu/iommufd/iommufd_test.h
index a6b7a163f636..1cd7e8394129 100644
--- a/drivers/iommu/iommufd/iommufd_test.h
+++ b/drivers/iommu/iommufd/iommufd_test.h
@@ -24,6 +24,11 @@ enum {
IOMMU_TEST_OP_MD_CHECK_IOTLB,
IOMMU_TEST_OP_TRIGGER_IOPF,
IOMMU_TEST_OP_DEV_CHECK_CACHE,
+ IOMMU_TEST_OP_TRIGGER_VEVENT,
+ IOMMU_TEST_OP_PASID_ATTACH,
+ IOMMU_TEST_OP_PASID_REPLACE,
+ IOMMU_TEST_OP_PASID_DETACH,
+ IOMMU_TEST_OP_PASID_CHECK_HWPT,
};
enum {
@@ -48,6 +53,7 @@ enum {
enum {
MOCK_FLAGS_DEVICE_NO_DIRTY = 1 << 0,
MOCK_FLAGS_DEVICE_HUGE_IOVA = 1 << 1,
+ MOCK_FLAGS_DEVICE_PASID = 1 << 2,
};
enum {
@@ -60,6 +66,9 @@ enum {
MOCK_DEV_CACHE_NUM = 4,
};
+/* Reserved for special pasid replace test */
+#define IOMMU_TEST_PASID_RESERVED 1024
+
struct iommu_test_cmd {
__u32 size;
__u32 op;
@@ -145,11 +154,36 @@ struct iommu_test_cmd {
__u32 id;
__u32 cache;
} check_dev_cache;
+ struct {
+ __u32 dev_id;
+ } trigger_vevent;
+ struct {
+ __u32 pasid;
+ __u32 pt_id;
+ /* @id is stdev_id */
+ } pasid_attach;
+ struct {
+ __u32 pasid;
+ __u32 pt_id;
+ /* @id is stdev_id */
+ } pasid_replace;
+ struct {
+ __u32 pasid;
+ /* @id is stdev_id */
+ } pasid_detach;
+ struct {
+ __u32 pasid;
+ __u32 hwpt_id;
+ /* @id is stdev_id */
+ } pasid_check;
};
__u32 last;
};
#define IOMMU_TEST_CMD _IO(IOMMUFD_TYPE, IOMMUFD_CMD_BASE + 32)
+/* Mock device/iommu PASID width */
+#define MOCK_PASID_WIDTH 20
+
/* Mock structs for IOMMU_DEVICE_GET_HW_INFO ioctl */
#define IOMMU_HW_INFO_TYPE_SELFTEST 0xfeedbeef
#define IOMMU_HW_INFO_SELFTEST_REGVAL 0xdeadbeef
@@ -212,4 +246,10 @@ struct iommu_viommu_invalidate_selftest {
__u32 cache_id;
};
+#define IOMMU_VEVENTQ_TYPE_SELFTEST 0xbeefbeef
+
+struct iommu_viommu_event_selftest {
+ __u32 virt_id;
+};
+
#endif
diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c
index b6fa9fd11bc1..3df468f64e7d 100644
--- a/drivers/iommu/iommufd/main.c
+++ b/drivers/iommu/iommufd/main.c
@@ -317,6 +317,7 @@ union ucmd_buffer {
struct iommu_ioas_unmap unmap;
struct iommu_option option;
struct iommu_vdevice_alloc vdev;
+ struct iommu_veventq_alloc veventq;
struct iommu_vfio_ioas vfio_ioas;
struct iommu_viommu_alloc viommu;
#ifdef CONFIG_IOMMUFD_TEST
@@ -372,6 +373,8 @@ static const struct iommufd_ioctl_op iommufd_ioctl_ops[] = {
IOCTL_OP(IOMMU_OPTION, iommufd_option, struct iommu_option, val64),
IOCTL_OP(IOMMU_VDEVICE_ALLOC, iommufd_vdevice_alloc_ioctl,
struct iommu_vdevice_alloc, virt_id),
+ IOCTL_OP(IOMMU_VEVENTQ_ALLOC, iommufd_veventq_alloc,
+ struct iommu_veventq_alloc, out_veventq_fd),
IOCTL_OP(IOMMU_VFIO_IOAS, iommufd_vfio_ioas, struct iommu_vfio_ioas,
__reserved),
IOCTL_OP(IOMMU_VIOMMU_ALLOC, iommufd_viommu_alloc_ioctl,
@@ -514,6 +517,10 @@ static const struct iommufd_object_ops iommufd_object_ops[] = {
[IOMMUFD_OBJ_VDEVICE] = {
.destroy = iommufd_vdevice_destroy,
},
+ [IOMMUFD_OBJ_VEVENTQ] = {
+ .destroy = iommufd_veventq_destroy,
+ .abort = iommufd_veventq_abort,
+ },
[IOMMUFD_OBJ_VIOMMU] = {
.destroy = iommufd_viommu_destroy,
},
diff --git a/drivers/iommu/iommufd/selftest.c b/drivers/iommu/iommufd/selftest.c
index d40deb0a4f06..18d9a216eb30 100644
--- a/drivers/iommu/iommufd/selftest.c
+++ b/drivers/iommu/iommufd/selftest.c
@@ -161,9 +161,13 @@ enum selftest_obj_type {
struct mock_dev {
struct device dev;
+ struct mock_viommu *viommu;
+ struct rw_semaphore viommu_rwsem;
unsigned long flags;
+ unsigned long vdev_id;
int id;
u32 cache[MOCK_DEV_CACHE_NUM];
+ atomic_t pasid_1024_fake_error;
};
static inline struct mock_dev *to_mock_dev(struct device *dev)
@@ -193,15 +197,71 @@ static int mock_domain_nop_attach(struct iommu_domain *domain,
struct device *dev)
{
struct mock_dev *mdev = to_mock_dev(dev);
+ struct mock_viommu *new_viommu = NULL;
+ unsigned long vdev_id = 0;
+ int rc;
if (domain->dirty_ops && (mdev->flags & MOCK_FLAGS_DEVICE_NO_DIRTY))
return -EINVAL;
+ iommu_group_mutex_assert(dev);
+ if (domain->type == IOMMU_DOMAIN_NESTED) {
+ new_viommu = to_mock_nested(domain)->mock_viommu;
+ if (new_viommu) {
+ rc = iommufd_viommu_get_vdev_id(&new_viommu->core, dev,
+ &vdev_id);
+ if (rc)
+ return rc;
+ }
+ }
+ if (new_viommu != mdev->viommu) {
+ down_write(&mdev->viommu_rwsem);
+ mdev->viommu = new_viommu;
+ mdev->vdev_id = vdev_id;
+ up_write(&mdev->viommu_rwsem);
+ }
+
+ return 0;
+}
+
+static int mock_domain_set_dev_pasid_nop(struct iommu_domain *domain,
+ struct device *dev, ioasid_t pasid,
+ struct iommu_domain *old)
+{
+ struct mock_dev *mdev = to_mock_dev(dev);
+
+ /*
+ * Per the first attach with pasid 1024, set the
+ * mdev->pasid_1024_fake_error. Hence the second call of this op
+ * can fake an error to validate the error path of the core. This
+ * is helpful to test the case in which the iommu core needs to
+ * rollback to the old domain due to driver failure. e.g. replace.
+ * User should be careful about the third call of this op, it shall
+ * succeed since the mdev->pasid_1024_fake_error is cleared in the
+ * second call.
+ */
+ if (pasid == 1024) {
+ if (domain->type == IOMMU_DOMAIN_BLOCKED) {
+ atomic_set(&mdev->pasid_1024_fake_error, 0);
+ } else if (atomic_read(&mdev->pasid_1024_fake_error)) {
+ /*
+ * Clear the flag, and fake an error to fail the
+ * replacement.
+ */
+ atomic_set(&mdev->pasid_1024_fake_error, 0);
+ return -ENOMEM;
+ } else {
+ /* Set the flag to fake an error in next call */
+ atomic_set(&mdev->pasid_1024_fake_error, 1);
+ }
+ }
+
return 0;
}
static const struct iommu_domain_ops mock_blocking_ops = {
.attach_dev = mock_domain_nop_attach,
+ .set_dev_pasid = mock_domain_set_dev_pasid_nop
};
static struct iommu_domain mock_blocking_domain = {
@@ -343,7 +403,7 @@ mock_domain_alloc_nested(struct device *dev, struct iommu_domain *parent,
struct mock_iommu_domain_nested *mock_nested;
struct mock_iommu_domain *mock_parent;
- if (flags)
+ if (flags & ~IOMMU_HWPT_ALLOC_PASID)
return ERR_PTR(-EOPNOTSUPP);
if (!parent || parent->ops != mock_ops.default_domain_ops)
return ERR_PTR(-EINVAL);
@@ -365,7 +425,8 @@ mock_domain_alloc_paging_flags(struct device *dev, u32 flags,
{
bool has_dirty_flag = flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
const u32 PAGING_FLAGS = IOMMU_HWPT_ALLOC_DIRTY_TRACKING |
- IOMMU_HWPT_ALLOC_NEST_PARENT;
+ IOMMU_HWPT_ALLOC_NEST_PARENT |
+ IOMMU_HWPT_ALLOC_PASID;
struct mock_dev *mdev = to_mock_dev(dev);
bool no_dirty_ops = mdev->flags & MOCK_FLAGS_DEVICE_NO_DIRTY;
struct mock_iommu_domain *mock;
@@ -585,7 +646,7 @@ mock_viommu_alloc_domain_nested(struct iommufd_viommu *viommu, u32 flags,
struct mock_viommu *mock_viommu = to_mock_viommu(viommu);
struct mock_iommu_domain_nested *mock_nested;
- if (flags)
+ if (flags & ~IOMMU_HWPT_ALLOC_PASID)
return ERR_PTR(-EOPNOTSUPP);
mock_nested = __mock_domain_alloc_nested(user_data);
@@ -720,6 +781,7 @@ static const struct iommu_ops mock_ops = {
.map_pages = mock_domain_map_pages,
.unmap_pages = mock_domain_unmap_pages,
.iova_to_phys = mock_domain_iova_to_phys,
+ .set_dev_pasid = mock_domain_set_dev_pasid_nop,
},
};
@@ -780,6 +842,7 @@ static struct iommu_domain_ops domain_nested_ops = {
.free = mock_domain_free_nested,
.attach_dev = mock_domain_nop_attach,
.cache_invalidate_user = mock_domain_cache_invalidate_user,
+ .set_dev_pasid = mock_domain_set_dev_pasid_nop,
};
static inline struct iommufd_hw_pagetable *
@@ -839,17 +902,24 @@ static void mock_dev_release(struct device *dev)
static struct mock_dev *mock_dev_create(unsigned long dev_flags)
{
+ struct property_entry prop[] = {
+ PROPERTY_ENTRY_U32("pasid-num-bits", 0),
+ {},
+ };
+ const u32 valid_flags = MOCK_FLAGS_DEVICE_NO_DIRTY |
+ MOCK_FLAGS_DEVICE_HUGE_IOVA |
+ MOCK_FLAGS_DEVICE_PASID;
struct mock_dev *mdev;
int rc, i;
- if (dev_flags &
- ~(MOCK_FLAGS_DEVICE_NO_DIRTY | MOCK_FLAGS_DEVICE_HUGE_IOVA))
+ if (dev_flags & ~valid_flags)
return ERR_PTR(-EINVAL);
mdev = kzalloc(sizeof(*mdev), GFP_KERNEL);
if (!mdev)
return ERR_PTR(-ENOMEM);
+ init_rwsem(&mdev->viommu_rwsem);
device_initialize(&mdev->dev);
mdev->flags = dev_flags;
mdev->dev.release = mock_dev_release;
@@ -866,6 +936,15 @@ static struct mock_dev *mock_dev_create(unsigned long dev_flags)
if (rc)
goto err_put;
+ if (dev_flags & MOCK_FLAGS_DEVICE_PASID)
+ prop[0] = PROPERTY_ENTRY_U32("pasid-num-bits", MOCK_PASID_WIDTH);
+
+ rc = device_create_managed_software_node(&mdev->dev, prop, NULL);
+ if (rc) {
+ dev_err(&mdev->dev, "add pasid-num-bits property failed, rc: %d", rc);
+ goto err_put;
+ }
+
rc = device_add(&mdev->dev);
if (rc)
goto err_put;
@@ -921,7 +1000,7 @@ static int iommufd_test_mock_domain(struct iommufd_ucmd *ucmd,
}
sobj->idev.idev = idev;
- rc = iommufd_device_attach(idev, &pt_id);
+ rc = iommufd_device_attach(idev, IOMMU_NO_PASID, &pt_id);
if (rc)
goto out_unbind;
@@ -936,7 +1015,7 @@ static int iommufd_test_mock_domain(struct iommufd_ucmd *ucmd,
return 0;
out_detach:
- iommufd_device_detach(idev);
+ iommufd_device_detach(idev, IOMMU_NO_PASID);
out_unbind:
iommufd_device_unbind(idev);
out_mdev:
@@ -946,39 +1025,49 @@ out_sobj:
return rc;
}
-/* Replace the mock domain with a manually allocated hw_pagetable */
-static int iommufd_test_mock_domain_replace(struct iommufd_ucmd *ucmd,
- unsigned int device_id, u32 pt_id,
- struct iommu_test_cmd *cmd)
+static struct selftest_obj *
+iommufd_test_get_selftest_obj(struct iommufd_ctx *ictx, u32 id)
{
struct iommufd_object *dev_obj;
struct selftest_obj *sobj;
- int rc;
/*
* Prefer to use the OBJ_SELFTEST because the destroy_rwsem will ensure
* it doesn't race with detach, which is not allowed.
*/
- dev_obj =
- iommufd_get_object(ucmd->ictx, device_id, IOMMUFD_OBJ_SELFTEST);
+ dev_obj = iommufd_get_object(ictx, id, IOMMUFD_OBJ_SELFTEST);
if (IS_ERR(dev_obj))
- return PTR_ERR(dev_obj);
+ return ERR_CAST(dev_obj);
sobj = to_selftest_obj(dev_obj);
if (sobj->type != TYPE_IDEV) {
- rc = -EINVAL;
- goto out_dev_obj;
+ iommufd_put_object(ictx, dev_obj);
+ return ERR_PTR(-EINVAL);
}
+ return sobj;
+}
- rc = iommufd_device_replace(sobj->idev.idev, &pt_id);
+/* Replace the mock domain with a manually allocated hw_pagetable */
+static int iommufd_test_mock_domain_replace(struct iommufd_ucmd *ucmd,
+ unsigned int device_id, u32 pt_id,
+ struct iommu_test_cmd *cmd)
+{
+ struct selftest_obj *sobj;
+ int rc;
+
+ sobj = iommufd_test_get_selftest_obj(ucmd->ictx, device_id);
+ if (IS_ERR(sobj))
+ return PTR_ERR(sobj);
+
+ rc = iommufd_device_replace(sobj->idev.idev, IOMMU_NO_PASID, &pt_id);
if (rc)
- goto out_dev_obj;
+ goto out_sobj;
cmd->mock_domain_replace.pt_id = pt_id;
rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
-out_dev_obj:
- iommufd_put_object(ucmd->ictx, dev_obj);
+out_sobj:
+ iommufd_put_object(ucmd->ictx, &sobj->obj);
return rc;
}
@@ -1597,13 +1686,166 @@ static int iommufd_test_trigger_iopf(struct iommufd_ucmd *ucmd,
return 0;
}
+static int iommufd_test_trigger_vevent(struct iommufd_ucmd *ucmd,
+ struct iommu_test_cmd *cmd)
+{
+ struct iommu_viommu_event_selftest test = {};
+ struct iommufd_device *idev;
+ struct mock_dev *mdev;
+ int rc = -ENOENT;
+
+ idev = iommufd_get_device(ucmd, cmd->trigger_vevent.dev_id);
+ if (IS_ERR(idev))
+ return PTR_ERR(idev);
+ mdev = to_mock_dev(idev->dev);
+
+ down_read(&mdev->viommu_rwsem);
+ if (!mdev->viommu || !mdev->vdev_id)
+ goto out_unlock;
+
+ test.virt_id = mdev->vdev_id;
+ rc = iommufd_viommu_report_event(&mdev->viommu->core,
+ IOMMU_VEVENTQ_TYPE_SELFTEST, &test,
+ sizeof(test));
+out_unlock:
+ up_read(&mdev->viommu_rwsem);
+ iommufd_put_object(ucmd->ictx, &idev->obj);
+
+ return rc;
+}
+
+static inline struct iommufd_hw_pagetable *
+iommufd_get_hwpt(struct iommufd_ucmd *ucmd, u32 id)
+{
+ struct iommufd_object *pt_obj;
+
+ pt_obj = iommufd_get_object(ucmd->ictx, id, IOMMUFD_OBJ_ANY);
+ if (IS_ERR(pt_obj))
+ return ERR_CAST(pt_obj);
+
+ if (pt_obj->type != IOMMUFD_OBJ_HWPT_NESTED &&
+ pt_obj->type != IOMMUFD_OBJ_HWPT_PAGING) {
+ iommufd_put_object(ucmd->ictx, pt_obj);
+ return ERR_PTR(-EINVAL);
+ }
+
+ return container_of(pt_obj, struct iommufd_hw_pagetable, obj);
+}
+
+static int iommufd_test_pasid_check_hwpt(struct iommufd_ucmd *ucmd,
+ struct iommu_test_cmd *cmd)
+{
+ u32 hwpt_id = cmd->pasid_check.hwpt_id;
+ struct iommu_domain *attached_domain;
+ struct iommu_attach_handle *handle;
+ struct iommufd_hw_pagetable *hwpt;
+ struct selftest_obj *sobj;
+ struct mock_dev *mdev;
+ int rc = 0;
+
+ sobj = iommufd_test_get_selftest_obj(ucmd->ictx, cmd->id);
+ if (IS_ERR(sobj))
+ return PTR_ERR(sobj);
+
+ mdev = sobj->idev.mock_dev;
+
+ handle = iommu_attach_handle_get(mdev->dev.iommu_group,
+ cmd->pasid_check.pasid, 0);
+ if (IS_ERR(handle))
+ attached_domain = NULL;
+ else
+ attached_domain = handle->domain;
+
+ /* hwpt_id == 0 means to check if pasid is detached */
+ if (!hwpt_id) {
+ if (attached_domain)
+ rc = -EINVAL;
+ goto out_sobj;
+ }
+
+ hwpt = iommufd_get_hwpt(ucmd, hwpt_id);
+ if (IS_ERR(hwpt)) {
+ rc = PTR_ERR(hwpt);
+ goto out_sobj;
+ }
+
+ if (attached_domain != hwpt->domain)
+ rc = -EINVAL;
+
+ iommufd_put_object(ucmd->ictx, &hwpt->obj);
+out_sobj:
+ iommufd_put_object(ucmd->ictx, &sobj->obj);
+ return rc;
+}
+
+static int iommufd_test_pasid_attach(struct iommufd_ucmd *ucmd,
+ struct iommu_test_cmd *cmd)
+{
+ struct selftest_obj *sobj;
+ int rc;
+
+ sobj = iommufd_test_get_selftest_obj(ucmd->ictx, cmd->id);
+ if (IS_ERR(sobj))
+ return PTR_ERR(sobj);
+
+ rc = iommufd_device_attach(sobj->idev.idev, cmd->pasid_attach.pasid,
+ &cmd->pasid_attach.pt_id);
+ if (rc)
+ goto out_sobj;
+
+ rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
+ if (rc)
+ iommufd_device_detach(sobj->idev.idev,
+ cmd->pasid_attach.pasid);
+
+out_sobj:
+ iommufd_put_object(ucmd->ictx, &sobj->obj);
+ return rc;
+}
+
+static int iommufd_test_pasid_replace(struct iommufd_ucmd *ucmd,
+ struct iommu_test_cmd *cmd)
+{
+ struct selftest_obj *sobj;
+ int rc;
+
+ sobj = iommufd_test_get_selftest_obj(ucmd->ictx, cmd->id);
+ if (IS_ERR(sobj))
+ return PTR_ERR(sobj);
+
+ rc = iommufd_device_replace(sobj->idev.idev, cmd->pasid_attach.pasid,
+ &cmd->pasid_attach.pt_id);
+ if (rc)
+ goto out_sobj;
+
+ rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
+
+out_sobj:
+ iommufd_put_object(ucmd->ictx, &sobj->obj);
+ return rc;
+}
+
+static int iommufd_test_pasid_detach(struct iommufd_ucmd *ucmd,
+ struct iommu_test_cmd *cmd)
+{
+ struct selftest_obj *sobj;
+
+ sobj = iommufd_test_get_selftest_obj(ucmd->ictx, cmd->id);
+ if (IS_ERR(sobj))
+ return PTR_ERR(sobj);
+
+ iommufd_device_detach(sobj->idev.idev, cmd->pasid_detach.pasid);
+ iommufd_put_object(ucmd->ictx, &sobj->obj);
+ return 0;
+}
+
void iommufd_selftest_destroy(struct iommufd_object *obj)
{
struct selftest_obj *sobj = to_selftest_obj(obj);
switch (sobj->type) {
case TYPE_IDEV:
- iommufd_device_detach(sobj->idev.idev);
+ iommufd_device_detach(sobj->idev.idev, IOMMU_NO_PASID);
iommufd_device_unbind(sobj->idev.idev);
mock_dev_destroy(sobj->idev.mock_dev);
break;
@@ -1678,6 +1920,16 @@ int iommufd_test(struct iommufd_ucmd *ucmd)
cmd->dirty.flags);
case IOMMU_TEST_OP_TRIGGER_IOPF:
return iommufd_test_trigger_iopf(ucmd, cmd);
+ case IOMMU_TEST_OP_TRIGGER_VEVENT:
+ return iommufd_test_trigger_vevent(ucmd, cmd);
+ case IOMMU_TEST_OP_PASID_ATTACH:
+ return iommufd_test_pasid_attach(ucmd, cmd);
+ case IOMMU_TEST_OP_PASID_REPLACE:
+ return iommufd_test_pasid_replace(ucmd, cmd);
+ case IOMMU_TEST_OP_PASID_DETACH:
+ return iommufd_test_pasid_detach(ucmd, cmd);
+ case IOMMU_TEST_OP_PASID_CHECK_HWPT:
+ return iommufd_test_pasid_check_hwpt(ucmd, cmd);
default:
return -EOPNOTSUPP;
}
@@ -1724,6 +1976,7 @@ int __init iommufd_test_init(void)
init_completion(&mock_iommu.complete);
mock_iommu_iopf_queue = iopf_queue_alloc("mock-iopfq");
+ mock_iommu.iommu_dev.max_pasids = (1 << MOCK_PASID_WIDTH);
return 0;
diff --git a/drivers/iommu/iommufd/viommu.c b/drivers/iommu/iommufd/viommu.c
index 69b88e8c7c26..01df2b985f02 100644
--- a/drivers/iommu/iommufd/viommu.c
+++ b/drivers/iommu/iommufd/viommu.c
@@ -59,6 +59,8 @@ int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd)
viommu->ictx = ucmd->ictx;
viommu->hwpt = hwpt_paging;
refcount_inc(&viommu->hwpt->common.obj.users);
+ INIT_LIST_HEAD(&viommu->veventqs);
+ init_rwsem(&viommu->veventqs_rwsem);
/*
* It is the most likely case that a physical IOMMU is unpluggable. A
* pluggable IOMMU instance (if exists) is responsible for refcounting
diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c
index 074daf1aac4e..e424b279a8cd 100644
--- a/drivers/iommu/ipmmu-vmsa.c
+++ b/drivers/iommu/ipmmu-vmsa.c
@@ -1081,31 +1081,24 @@ static int ipmmu_probe(struct platform_device *pdev)
}
}
+ platform_set_drvdata(pdev, mmu);
/*
* Register the IPMMU to the IOMMU subsystem in the following cases:
* - R-Car Gen2 IPMMU (all devices registered)
* - R-Car Gen3 IPMMU (leaf devices only - skip root IPMMU-MM device)
*/
- if (!mmu->features->has_cache_leaf_nodes || !ipmmu_is_root(mmu)) {
- ret = iommu_device_sysfs_add(&mmu->iommu, &pdev->dev, NULL,
- dev_name(&pdev->dev));
- if (ret)
- return ret;
-
- ret = iommu_device_register(&mmu->iommu, &ipmmu_ops, &pdev->dev);
- if (ret)
- return ret;
- }
+ if (mmu->features->has_cache_leaf_nodes && ipmmu_is_root(mmu))
+ return 0;
- /*
- * We can't create the ARM mapping here as it requires the bus to have
- * an IOMMU, which only happens when bus_set_iommu() is called in
- * ipmmu_init() after the probe function returns.
- */
+ ret = iommu_device_sysfs_add(&mmu->iommu, &pdev->dev, NULL, dev_name(&pdev->dev));
+ if (ret)
+ return ret;
- platform_set_drvdata(pdev, mmu);
+ ret = iommu_device_register(&mmu->iommu, &ipmmu_ops, &pdev->dev);
+ if (ret)
+ iommu_device_sysfs_remove(&mmu->iommu);
- return 0;
+ return ret;
}
static void ipmmu_remove(struct platform_device *pdev)
diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index 034b0e670384..df98d0c65f54 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -1372,15 +1372,6 @@ static int mtk_iommu_probe(struct platform_device *pdev)
platform_set_drvdata(pdev, data);
mutex_init(&data->mutex);
- ret = iommu_device_sysfs_add(&data->iommu, dev, NULL,
- "mtk-iommu.%pa", &ioaddr);
- if (ret)
- goto out_link_remove;
-
- ret = iommu_device_register(&data->iommu, &mtk_iommu_ops, dev);
- if (ret)
- goto out_sysfs_remove;
-
if (MTK_IOMMU_HAS_FLAG(data->plat_data, SHARE_PGTABLE)) {
list_add_tail(&data->list, data->plat_data->hw_list);
data->hw_list = data->plat_data->hw_list;
@@ -1390,19 +1381,28 @@ static int mtk_iommu_probe(struct platform_device *pdev)
data->hw_list = &data->hw_list_head;
}
+ ret = iommu_device_sysfs_add(&data->iommu, dev, NULL,
+ "mtk-iommu.%pa", &ioaddr);
+ if (ret)
+ goto out_list_del;
+
+ ret = iommu_device_register(&data->iommu, &mtk_iommu_ops, dev);
+ if (ret)
+ goto out_sysfs_remove;
+
if (MTK_IOMMU_IS_TYPE(data->plat_data, MTK_IOMMU_TYPE_MM)) {
ret = component_master_add_with_match(dev, &mtk_iommu_com_ops, match);
if (ret)
- goto out_list_del;
+ goto out_device_unregister;
}
return ret;
-out_list_del:
- list_del(&data->list);
+out_device_unregister:
iommu_device_unregister(&data->iommu);
out_sysfs_remove:
iommu_device_sysfs_remove(&data->iommu);
-out_link_remove:
+out_list_del:
+ list_del(&data->list);
if (MTK_IOMMU_IS_TYPE(data->plat_data, MTK_IOMMU_TYPE_MM))
device_link_remove(data->smicomm_dev, dev);
out_runtime_disable: