int mlx5vf_cmd_query_vhca_migration_state(struct mlx5vf_pci_core_device *mvdev,
size_t *state_size, u64 *total_size,
- u8 query_flags)
+ u8 *mig_state, u8 query_flags)
{
u32 out[MLX5_ST_SZ_DW(query_vhca_migration_state_out)] = {};
u32 in[MLX5_ST_SZ_DW(query_vhca_migration_state_in)] = {};
MLX5_GET64(query_vhca_migration_state_out, out,
remaining_total_size) : *state_size;
+ if (mig_state && mvdev->mig_state_cap)
+ *mig_state = MLX5_GET(query_vhca_migration_state_out, out,
+ migration_state);
+
return 0;
}
if (MLX5_CAP_GEN_2(mvdev->mdev, migration_in_chunks))
mvdev->chunk_mode = 1;
+ if (MLX5_CAP_GEN_2(mvdev->mdev, migration_state))
+ mvdev->mig_state_cap = 1;
+
end:
mlx5_vf_put_core_dev(mvdev->mdev);
}
{
spin_lock_irq(&buf->migf->list_lock);
buf->stop_copy_chunk_num = 0;
+ buf->pre_copy_init_bytes_chunk = false;
list_add_tail(&buf->buf_elm, &buf->migf->avail_list);
spin_unlock_irq(&buf->migf->list_lock);
}
!next_required_umem_size;
if (async_data->header_buf) {
status = add_buf_header(async_data->header_buf, image_size,
- initial_pre_copy);
+ initial_pre_copy ||
+ async_data->buf->pre_copy_init_bytes_chunk);
if (status)
goto err;
}
}
}
spin_unlock_irqrestore(&migf->list_lock, flags);
- if (initial_pre_copy) {
+ if (initial_pre_copy || async_data->buf->pre_copy_init_bytes_chunk) {
migf->pre_copy_initial_bytes += image_size;
- migf->state = MLX5_MIGF_STATE_PRE_COPY;
+ if (initial_pre_copy)
+ migf->state = MLX5_MIGF_STATE_PRE_COPY;
+ if (async_data->buf->pre_copy_init_bytes_chunk)
+ async_data->buf->pre_copy_init_bytes_chunk = false;
}
if (stop_copy_last_chunk)
migf->state = MLX5_MIGF_STATE_COMPLETE;
u32 *mkey_in;
enum dma_data_direction dma_dir;
u8 stop_copy_chunk_num;
+ bool pre_copy_init_bytes_chunk;
struct list_head buf_elm;
struct mlx5_vf_migration_file *migf;
};
u32 record_tag;
u64 stop_copy_prep_size;
u64 pre_copy_initial_bytes;
+ u64 pre_copy_initial_bytes_start;
size_t next_required_umem_size;
u8 num_ready_chunks;
/* Upon chunk mode preserve another set of buffers for stop_copy phase */
u8 mdev_detach:1;
u8 log_active:1;
u8 chunk_mode:1;
+ u8 mig_state_cap:1;
struct completion tracker_comp;
/* protect migration state */
struct mutex state_mutex;
int mlx5vf_cmd_resume_vhca(struct mlx5vf_pci_core_device *mvdev, u16 op_mod);
int mlx5vf_cmd_query_vhca_migration_state(struct mlx5vf_pci_core_device *mvdev,
size_t *state_size, u64 *total_size,
- u8 query_flags);
+ u8 *migration_state, u8 query_flags);
void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev,
const struct vfio_migration_ops *mig_ops,
const struct vfio_log_ops *log_ops);
struct mlx5_vhca_data_buffer *buf;
struct vfio_precopy_info info = {};
loff_t *pos = &filp->f_pos;
+ u8 migration_state = 0;
size_t inc_length = 0;
- bool end_of_data = false;
+ bool reinit_state;
+ bool end_of_data;
int ret;
ret = vfio_check_precopy_ioctl(&mvdev->core_device.vdev, cmd, arg,
* As so, the other code below is safe with the proper locks.
*/
ret = mlx5vf_cmd_query_vhca_migration_state(mvdev, &inc_length,
- NULL, MLX5VF_QUERY_INC);
+ NULL, &migration_state,
+ MLX5VF_QUERY_INC);
if (ret)
goto err_state_unlock;
}
goto err_migf_unlock;
}
- if (migf->pre_copy_initial_bytes > *pos) {
- info.initial_bytes = migf->pre_copy_initial_bytes - *pos;
+ /*
+ * opt-in for VFIO_DEVICE_FEATURE_MIG_PRECOPY_INFOv2 serves
+ * as opt-in for VFIO_PRECOPY_INFO_REINIT as well
+ */
+ reinit_state = mvdev->core_device.vdev.precopy_info_v2 &&
+ migration_state == MLX5_QUERY_VHCA_MIG_STATE_OPER_MIGRATION_INIT;
+ end_of_data = !(migf->max_pos - *pos);
+ if (reinit_state) {
+ /*
+ * Any bytes already present in memory are treated as initial
+ * bytes, since the caller is required to read them before
+ * reaching the new initial-bytes region.
+ */
+ migf->pre_copy_initial_bytes_start = *pos;
+ migf->pre_copy_initial_bytes = migf->max_pos - *pos;
+ info.initial_bytes = migf->pre_copy_initial_bytes + inc_length;
+ info.flags |= VFIO_PRECOPY_INFO_REINIT;
} else {
- info.dirty_bytes = migf->max_pos - *pos;
- if (!info.dirty_bytes)
- end_of_data = true;
- info.dirty_bytes += inc_length;
+ if (migf->pre_copy_initial_bytes_start +
+ migf->pre_copy_initial_bytes > *pos) {
+ WARN_ON_ONCE(end_of_data);
+ info.initial_bytes = migf->pre_copy_initial_bytes_start +
+ migf->pre_copy_initial_bytes - *pos;
+ } else {
+ info.dirty_bytes = (migf->max_pos - *pos) + inc_length;
+ }
}
+ mutex_unlock(&migf->lock);
- if (!end_of_data || !inc_length) {
- mutex_unlock(&migf->lock);
- goto done;
- }
+ if ((reinit_state || end_of_data) && inc_length) {
+ /*
+ * In case we finished transferring the current state and the
+ * device has a dirty state, or that the device has a new init
+ * state, save a new state to be ready for.
+ */
+ buf = mlx5vf_get_data_buffer(migf, DIV_ROUND_UP(inc_length, PAGE_SIZE),
+ DMA_FROM_DEVICE);
+ if (IS_ERR(buf)) {
+ ret = PTR_ERR(buf);
+ mlx5vf_mark_err(migf);
+ goto err_state_unlock;
+ }
- mutex_unlock(&migf->lock);
- /*
- * We finished transferring the current state and the device has a
- * dirty state, save a new state to be ready for.
- */
- buf = mlx5vf_get_data_buffer(migf, DIV_ROUND_UP(inc_length, PAGE_SIZE),
- DMA_FROM_DEVICE);
- if (IS_ERR(buf)) {
- ret = PTR_ERR(buf);
- mlx5vf_mark_err(migf);
- goto err_state_unlock;
- }
+ buf->pre_copy_init_bytes_chunk = reinit_state;
+ ret = mlx5vf_cmd_save_vhca_state(mvdev, migf, buf, true, true);
+ if (ret) {
+ mlx5vf_mark_err(migf);
+ mlx5vf_put_data_buffer(buf);
+ goto err_state_unlock;
+ }
- ret = mlx5vf_cmd_save_vhca_state(mvdev, migf, buf, true, true);
- if (ret) {
- mlx5vf_mark_err(migf);
- mlx5vf_put_data_buffer(buf);
- goto err_state_unlock;
+ /*
+ * SAVE appends a header record via add_buf_header(),
+ * let's account it as well.
+ */
+ if (reinit_state)
+ info.initial_bytes += sizeof(struct mlx5_vf_migration_header);
+ else
+ info.dirty_bytes += sizeof(struct mlx5_vf_migration_header);
}
-done:
mlx5vf_state_mutex_unlock(mvdev);
if (copy_to_user((void __user *)arg, &info,
offsetofend(struct vfio_precopy_info, dirty_bytes)))
if (migf->state == MLX5_MIGF_STATE_ERROR)
return -ENODEV;
- ret = mlx5vf_cmd_query_vhca_migration_state(mvdev, &length, NULL,
+ ret = mlx5vf_cmd_query_vhca_migration_state(mvdev, &length, NULL, NULL,
MLX5VF_QUERY_INC | MLX5VF_QUERY_FINAL);
if (ret)
goto err;
if (ret)
goto out;
- ret = mlx5vf_cmd_query_vhca_migration_state(mvdev, &length, &full_size, 0);
+ ret = mlx5vf_cmd_query_vhca_migration_state(mvdev, &length, &full_size, NULL, 0);
if (ret)
goto out_pd;
enum mlx5_vf_migf_state state;
size_t size;
- ret = mlx5vf_cmd_query_vhca_migration_state(mvdev, &size, NULL,
+ ret = mlx5vf_cmd_query_vhca_migration_state(mvdev, &size, NULL, NULL,
MLX5VF_QUERY_INC | MLX5VF_QUERY_CLEANUP);
if (ret)
return ERR_PTR(ret);
mutex_lock(&mvdev->state_mutex);
ret = mlx5vf_cmd_query_vhca_migration_state(mvdev, &state_size,
- &total_size, 0);
+ &total_size, NULL, 0);
if (!ret)
*stop_copy_length = total_size;
mlx5vf_state_mutex_unlock(mvdev);