diff options
author | Jack Morgenstein <jackm@dev.mellanox.co.il> | 2016-09-12 19:16:20 +0300 |
---|---|---|
committer | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2016-10-07 15:23:47 +0200 |
commit | 6bec514a1e08e1949835fab6744c3fd0a7655273 (patch) | |
tree | d901f3aeea71113867722f4007363bc36533d46d | |
parent | 061a9f3e88e4fa4a2d9ecb1d2fd2975eec51008f (diff) | |
download | lwn-6bec514a1e08e1949835fab6744c3fd0a7655273.tar.gz lwn-6bec514a1e08e1949835fab6744c3fd0a7655273.zip |
IB/mlx4: Use correct subnet-prefix in QP1 mads under SR-IOV
commit 8ec07bf8a8b57d6c58927a16a0a22c0115cf2855 upstream.
When sending QP1 MAD packets which use a GRH, the source GID
(which consists of the 64-bit subnet prefix, and the 64 bit port GUID)
must be included in the packet GRH.
For SR-IOV, a GID cache is used, since the source GID needs to be the
slave's source GID, and not the Hypervisor's GID. This cache also
included a subnet_prefix. Unfortunately, the subnet_prefix field in
the cache was never initialized (to the default subnet prefix 0xfe80::0).
As a result, this field remained all zeroes. Therefore, when SR-IOV
was active, all QP1 packets which included a GRH had a source GID
subnet prefix of all-zeroes.
However, the subnet-prefix should initially be 0xfe80::0 (the default
subnet prefix). In addition, if OpenSM modifies a port's subnet prefix,
the new subnet prefix must be used in the GRH when sending QP1 packets.
To fix this we now initialize the subnet prefix in the SR-IOV GID cache
to the default subnet prefix. We update the cached value if/when OpenSM
modifies the port's subnet prefix. We take this cached value when sending
QP1 packets when SR-IOV is active.
Note that the value is stored as an atomic64. This eliminates any need
for locking when the subnet prefix is being updated.
Note also that we depend on the FW generating the "port management change"
event for tracking subnet-prefix changes performed by OpenSM. If running
early FW (before 2.9.4630), subnet prefix changes will not be tracked (but
the default subnet prefix still will be stored in the cache; therefore
users who do not modify the subnet prefix will not have a problem).
IF there is a need for such tracking also for early FW, we will add that
capability in a subsequent patch.
Fixes: 1ffeb2eb8be9 ("IB/mlx4: SR-IOV IB context objects and proxy/tunnel SQP support")
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Doug Ledford <dledford@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-rw-r--r-- | drivers/infiniband/hw/mlx4/mad.c | 23 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx4/mlx4_ib.h | 2 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx4/qp.c | 5 |
3 files changed, 27 insertions, 3 deletions
diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 05179f47bbde..d862b9b7910e 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -1080,6 +1080,27 @@ void handle_port_mgmt_change_event(struct work_struct *work) /* Generate GUID changed event */ if (changed_attr & MLX4_EQ_PORT_INFO_GID_PFX_CHANGE_MASK) { + if (mlx4_is_master(dev->dev)) { + union ib_gid gid; + int err = 0; + + if (!eqe->event.port_mgmt_change.params.port_info.gid_prefix) + err = __mlx4_ib_query_gid(&dev->ib_dev, port, 0, &gid, 1); + else + gid.global.subnet_prefix = + eqe->event.port_mgmt_change.params.port_info.gid_prefix; + if (err) { + pr_warn("Could not change QP1 subnet prefix for port %d: query_gid error (%d)\n", + port, err); + } else { + pr_debug("Changing QP1 subnet prefix for port %d. old=0x%llx. new=0x%llx\n", + port, + (u64)atomic64_read(&dev->sriov.demux[port - 1].subnet_prefix), + be64_to_cpu(gid.global.subnet_prefix)); + atomic64_set(&dev->sriov.demux[port - 1].subnet_prefix, + be64_to_cpu(gid.global.subnet_prefix)); + } + } mlx4_ib_dispatch_event(dev, port, IB_EVENT_GID_CHANGE); /*if master, notify all slaves*/ if (mlx4_is_master(dev->dev)) @@ -2154,6 +2175,8 @@ int mlx4_ib_init_sriov(struct mlx4_ib_dev *dev) if (err) goto demux_err; dev->sriov.demux[i].guid_cache[0] = gid.global.interface_id; + atomic64_set(&dev->sriov.demux[i].subnet_prefix, + be64_to_cpu(gid.global.subnet_prefix)); err = alloc_pv_object(dev, mlx4_master_func_num(dev->dev), i + 1, &dev->sriov.sqps[i]); if (err) diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 1caa11edac03..78f29e91653a 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -441,7 +441,7 @@ struct mlx4_ib_demux_ctx { struct workqueue_struct *wq; struct workqueue_struct *ud_wq; spinlock_t ud_lock; - __be64 subnet_prefix; + atomic64_t subnet_prefix; __be64 guid_cache[128]; struct mlx4_ib_dev *dev; /* the following lock protects both mcg_table and mcg_mgid0_list */ diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index b2870995fb99..f350f2d61c15 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -2340,8 +2340,9 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr, * we must use our own cache */ sqp->ud_header.grh.source_gid.global.subnet_prefix = - to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1]. - subnet_prefix; + cpu_to_be64(atomic64_read(&(to_mdev(ib_dev)->sriov. + demux[sqp->qp.port - 1]. + subnet_prefix))); sqp->ud_header.grh.source_gid.global.interface_id = to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1]. guid_cache[ah->av.ib.gid_index]; |