diff options
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 10 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 103 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 6 |
3 files changed, 119 insertions, 0 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index c143f242a84d..64e3b4e3a712 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -2766,7 +2766,17 @@ static int criu_resume(struct file *filep, } mutex_lock(&target->mutex); + ret = kfd_criu_resume_svm(target); + if (ret) { + pr_err("kfd_criu_resume_svm failed for %i\n", args->pid); + goto exit; + } + ret = amdgpu_amdkfd_criu_resume(target->kgd_process_info); + if (ret) + pr_err("amdgpu_amdkfd_criu_resume failed for %i\n", args->pid); + +exit: mutex_unlock(&target->mutex); kfd_unref_process(target); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 41ac049b3316..41f03d165bad 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -3487,6 +3487,109 @@ fill_values: return 0; } +int kfd_criu_resume_svm(struct kfd_process *p) +{ + struct kfd_ioctl_svm_attribute *set_attr_new, *set_attr = NULL; + int nattr_common = 4, nattr_accessibility = 1; + struct criu_svm_metadata *criu_svm_md = NULL; + struct svm_range_list *svms = &p->svms; + struct criu_svm_metadata *next = NULL; + uint32_t set_flags = 0xffffffff; + int i, j, num_attrs, ret = 0; + uint64_t set_attr_size; + struct mm_struct *mm; + + if (list_empty(&svms->criu_svm_metadata_list)) { + pr_debug("No SVM data from CRIU restore stage 2\n"); + return ret; + } + + mm = get_task_mm(p->lead_thread); + if (!mm) { + pr_err("failed to get mm for the target process\n"); + return -ESRCH; + } + + num_attrs = nattr_common + (nattr_accessibility * p->n_pdds); + + i = j = 0; + list_for_each_entry(criu_svm_md, &svms->criu_svm_metadata_list, list) { + pr_debug("criu_svm_md[%d]\n\tstart: 0x%llx size: 0x%llx (npages)\n", + i, criu_svm_md->data.start_addr, criu_svm_md->data.size); + + for (j = 0; j < num_attrs; j++) { + pr_debug("\ncriu_svm_md[%d]->attrs[%d].type : 0x%x \ncriu_svm_md[%d]->attrs[%d].value : 0x%x\n", + i, j, criu_svm_md->data.attrs[j].type, + i, j, criu_svm_md->data.attrs[j].value); + switch (criu_svm_md->data.attrs[j].type) { + /* During Checkpoint operation, the query for + * KFD_IOCTL_SVM_ATTR_PREFETCH_LOC attribute might + * return KFD_IOCTL_SVM_LOCATION_UNDEFINED if they were + * not used by the range which was checkpointed. Care + * must be taken to not restore with an invalid value + * otherwise the gpuidx value will be invalid and + * set_attr would eventually fail so just replace those + * with another dummy attribute such as + * KFD_IOCTL_SVM_ATTR_SET_FLAGS. + */ + case KFD_IOCTL_SVM_ATTR_PREFETCH_LOC: + if (criu_svm_md->data.attrs[j].value == + KFD_IOCTL_SVM_LOCATION_UNDEFINED) { + criu_svm_md->data.attrs[j].type = + KFD_IOCTL_SVM_ATTR_SET_FLAGS; + criu_svm_md->data.attrs[j].value = 0; + } + break; + case KFD_IOCTL_SVM_ATTR_SET_FLAGS: + set_flags = criu_svm_md->data.attrs[j].value; + break; + default: + break; + } + } + + /* CLR_FLAGS is not available via get_attr during checkpoint but + * it needs to be inserted before restoring the ranges so + * allocate extra space for it before calling set_attr + */ + set_attr_size = sizeof(struct kfd_ioctl_svm_attribute) * + (num_attrs + 1); + set_attr_new = krealloc(set_attr, set_attr_size, + GFP_KERNEL); + if (!set_attr_new) { + ret = -ENOMEM; + goto exit; + } + set_attr = set_attr_new; + + memcpy(set_attr, criu_svm_md->data.attrs, num_attrs * + sizeof(struct kfd_ioctl_svm_attribute)); + set_attr[num_attrs].type = KFD_IOCTL_SVM_ATTR_CLR_FLAGS; + set_attr[num_attrs].value = ~set_flags; + + ret = svm_range_set_attr(p, mm, criu_svm_md->data.start_addr, + criu_svm_md->data.size, num_attrs + 1, + set_attr); + if (ret) { + pr_err("CRIU: failed to set range attributes\n"); + goto exit; + } + + i++; + } +exit: + kfree(set_attr); + list_for_each_entry_safe(criu_svm_md, next, &svms->criu_svm_metadata_list, list) { + pr_debug("freeing criu_svm_md[]\n\tstart: 0x%llx\n", + criu_svm_md->data.start_addr); + kfree(criu_svm_md); + } + + mmput(mm); + return ret; + +} + int kfd_criu_restore_svm(struct kfd_process *p, uint8_t __user *user_priv_ptr, uint64_t *priv_data_offset, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h index 3b5948f67de2..66c77f00ac3e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h @@ -192,6 +192,7 @@ int kfd_criu_restore_svm(struct kfd_process *p, uint8_t __user *user_priv_ptr, uint64_t *priv_data_offset, uint64_t max_priv_data_size); +int kfd_criu_resume_svm(struct kfd_process *p); struct kfd_process_device * svm_range_get_pdd_by_adev(struct svm_range *prange, struct amdgpu_device *adev); void svm_range_list_lock_and_flush_work(struct svm_range_list *svms, struct mm_struct *mm); @@ -253,6 +254,11 @@ static inline int kfd_criu_restore_svm(struct kfd_process *p, return -EINVAL; } +static inline int kfd_criu_resume_svm(struct kfd_process *p) +{ + return 0; +} + #define KFD_IS_SVM_API_SUPPORTED(dev) false #endif /* IS_ENABLED(CONFIG_HSA_AMD_SVM) */ |