summaryrefslogtreecommitdiff
path: root/drivers/vfio
diff options
context:
space:
mode:
authorYishai Hadas <yishaih@nvidia.com>2023-01-24 16:49:55 +0200
committerAlex Williamson <alex.williamson@redhat.com>2023-01-30 12:16:15 -0700
commitf4f0c25e5d726e353369258b5ce28a01bd71aa47 (patch)
treed503f64863fd58face6ffaa9c2595bcfae466323 /drivers/vfio
parentb04e2e86e919633825728007484508dbdc44c7bb (diff)
downloadlwn-f4f0c25e5d726e353369258b5ce28a01bd71aa47.tar.gz
lwn-f4f0c25e5d726e353369258b5ce28a01bd71aa47.zip
vfio/mlx5: Improve the target side flow to reduce downtime
Improve the target side flow to reduce downtime as of below. - Support reading an optional record which includes the expected stop_copy size. - Once the source sends this record data, which expects to be sent as part of the pre_copy flow, prepare the data buffers that may be large enough to hold the final stop_copy data. The above reduces the migration downtime as the relevant stuff that is needed to load the image data is prepared ahead as part of pre_copy. Signed-off-by: Yishai Hadas <yishaih@nvidia.com> Link: https://lore.kernel.org/r/20230124144955.139901-4-yishaih@nvidia.com Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Diffstat (limited to 'drivers/vfio')
-rw-r--r--drivers/vfio/pci/mlx5/cmd.h6
-rw-r--r--drivers/vfio/pci/mlx5/main.c111
2 files changed, 105 insertions, 12 deletions
diff --git a/drivers/vfio/pci/mlx5/cmd.h b/drivers/vfio/pci/mlx5/cmd.h
index 8f1bef580028..aec4c69dd6c1 100644
--- a/drivers/vfio/pci/mlx5/cmd.h
+++ b/drivers/vfio/pci/mlx5/cmd.h
@@ -27,6 +27,8 @@ enum mlx5_vf_migf_state {
enum mlx5_vf_load_state {
MLX5_VF_LOAD_STATE_READ_IMAGE_NO_HEADER,
MLX5_VF_LOAD_STATE_READ_HEADER,
+ MLX5_VF_LOAD_STATE_PREP_HEADER_DATA,
+ MLX5_VF_LOAD_STATE_READ_HEADER_DATA,
MLX5_VF_LOAD_STATE_PREP_IMAGE,
MLX5_VF_LOAD_STATE_READ_IMAGE,
MLX5_VF_LOAD_STATE_LOAD_IMAGE,
@@ -59,7 +61,6 @@ struct mlx5_vhca_data_buffer {
loff_t start_pos;
u64 length;
u64 allocated_length;
- u64 header_image_size;
u32 mkey;
enum dma_data_direction dma_dir;
u8 dmaed:1;
@@ -89,6 +90,9 @@ struct mlx5_vf_migration_file {
enum mlx5_vf_load_state load_state;
u32 pdn;
loff_t max_pos;
+ u64 record_size;
+ u32 record_tag;
+ u64 stop_copy_prep_size;
u64 pre_copy_initial_bytes;
struct mlx5_vhca_data_buffer *buf;
struct mlx5_vhca_data_buffer *buf_header;
diff --git a/drivers/vfio/pci/mlx5/main.c b/drivers/vfio/pci/mlx5/main.c
index 6856e7b97533..e897537a9e8a 100644
--- a/drivers/vfio/pci/mlx5/main.c
+++ b/drivers/vfio/pci/mlx5/main.c
@@ -704,6 +704,56 @@ mlx5vf_resume_read_image(struct mlx5_vf_migration_file *migf,
}
static int
+mlx5vf_resume_read_header_data(struct mlx5_vf_migration_file *migf,
+ struct mlx5_vhca_data_buffer *vhca_buf,
+ const char __user **buf, size_t *len,
+ loff_t *pos, ssize_t *done)
+{
+ size_t copy_len, to_copy;
+ size_t required_data;
+ u8 *to_buff;
+ int ret;
+
+ required_data = migf->record_size - vhca_buf->length;
+ to_copy = min_t(size_t, *len, required_data);
+ copy_len = to_copy;
+ while (to_copy) {
+ ret = mlx5vf_append_page_to_mig_buf(vhca_buf, buf, &to_copy, pos,
+ done);
+ if (ret)
+ return ret;
+ }
+
+ *len -= copy_len;
+ if (vhca_buf->length == migf->record_size) {
+ switch (migf->record_tag) {
+ case MLX5_MIGF_HEADER_TAG_STOP_COPY_SIZE:
+ {
+ struct page *page;
+
+ page = mlx5vf_get_migration_page(vhca_buf, 0);
+ if (!page)
+ return -EINVAL;
+ to_buff = kmap_local_page(page);
+ migf->stop_copy_prep_size = min_t(u64,
+ le64_to_cpup((__le64 *)to_buff), MAX_LOAD_SIZE);
+ kunmap_local(to_buff);
+ break;
+ }
+ default:
+ /* Optional tag */
+ break;
+ }
+
+ migf->load_state = MLX5_VF_LOAD_STATE_READ_HEADER;
+ migf->max_pos += migf->record_size;
+ vhca_buf->length = 0;
+ }
+
+ return 0;
+}
+
+static int
mlx5vf_resume_read_header(struct mlx5_vf_migration_file *migf,
struct mlx5_vhca_data_buffer *vhca_buf,
const char __user **buf,
@@ -733,23 +783,38 @@ mlx5vf_resume_read_header(struct mlx5_vf_migration_file *migf,
*len -= copy_len;
vhca_buf->length += copy_len;
if (vhca_buf->length == sizeof(struct mlx5_vf_migration_header)) {
- u64 flags;
+ u64 record_size;
+ u32 flags;
- vhca_buf->header_image_size = le64_to_cpup((__le64 *)to_buff);
- if (vhca_buf->header_image_size > MAX_LOAD_SIZE) {
+ record_size = le64_to_cpup((__le64 *)to_buff);
+ if (record_size > MAX_LOAD_SIZE) {
ret = -ENOMEM;
goto end;
}
- flags = le64_to_cpup((__le64 *)(to_buff +
+ migf->record_size = record_size;
+ flags = le32_to_cpup((__le32 *)(to_buff +
offsetof(struct mlx5_vf_migration_header, flags)));
- if (flags) {
- ret = -EOPNOTSUPP;
- goto end;
+ migf->record_tag = le32_to_cpup((__le32 *)(to_buff +
+ offsetof(struct mlx5_vf_migration_header, tag)));
+ switch (migf->record_tag) {
+ case MLX5_MIGF_HEADER_TAG_FW_DATA:
+ migf->load_state = MLX5_VF_LOAD_STATE_PREP_IMAGE;
+ break;
+ case MLX5_MIGF_HEADER_TAG_STOP_COPY_SIZE:
+ migf->load_state = MLX5_VF_LOAD_STATE_PREP_HEADER_DATA;
+ break;
+ default:
+ if (!(flags & MLX5_MIGF_HEADER_FLAGS_TAG_OPTIONAL)) {
+ ret = -EOPNOTSUPP;
+ goto end;
+ }
+ /* We may read and skip this optional record data */
+ migf->load_state = MLX5_VF_LOAD_STATE_PREP_HEADER_DATA;
}
- migf->load_state = MLX5_VF_LOAD_STATE_PREP_IMAGE;
migf->max_pos += vhca_buf->length;
+ vhca_buf->length = 0;
*has_work = true;
}
end:
@@ -793,9 +858,34 @@ static ssize_t mlx5vf_resume_write(struct file *filp, const char __user *buf,
if (ret)
goto out_unlock;
break;
+ case MLX5_VF_LOAD_STATE_PREP_HEADER_DATA:
+ if (vhca_buf_header->allocated_length < migf->record_size) {
+ mlx5vf_free_data_buffer(vhca_buf_header);
+
+ migf->buf_header = mlx5vf_alloc_data_buffer(migf,
+ migf->record_size, DMA_NONE);
+ if (IS_ERR(migf->buf_header)) {
+ ret = PTR_ERR(migf->buf_header);
+ migf->buf_header = NULL;
+ goto out_unlock;
+ }
+
+ vhca_buf_header = migf->buf_header;
+ }
+
+ vhca_buf_header->start_pos = migf->max_pos;
+ migf->load_state = MLX5_VF_LOAD_STATE_READ_HEADER_DATA;
+ break;
+ case MLX5_VF_LOAD_STATE_READ_HEADER_DATA:
+ ret = mlx5vf_resume_read_header_data(migf, vhca_buf_header,
+ &buf, &len, pos, &done);
+ if (ret)
+ goto out_unlock;
+ break;
case MLX5_VF_LOAD_STATE_PREP_IMAGE:
{
- u64 size = vhca_buf_header->header_image_size;
+ u64 size = max(migf->record_size,
+ migf->stop_copy_prep_size);
if (vhca_buf->allocated_length < size) {
mlx5vf_free_data_buffer(vhca_buf);
@@ -824,7 +914,7 @@ static ssize_t mlx5vf_resume_write(struct file *filp, const char __user *buf,
break;
case MLX5_VF_LOAD_STATE_READ_IMAGE:
ret = mlx5vf_resume_read_image(migf, vhca_buf,
- vhca_buf_header->header_image_size,
+ migf->record_size,
&buf, &len, pos, &done, &has_work);
if (ret)
goto out_unlock;
@@ -837,7 +927,6 @@ static ssize_t mlx5vf_resume_write(struct file *filp, const char __user *buf,
/* prep header buf for next image */
vhca_buf_header->length = 0;
- vhca_buf_header->header_image_size = 0;
/* prep data buf for next image */
vhca_buf->length = 0;